2015-03-23 17:02:11 -07:00
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
2016-08-20 22:05:47 -07:00
// Simplifications that apply to all backend architectures. As an example, this
// Go source code
//
// y := 0 * x
//
// can be translated into y := 0 without losing any information, which saves a
// pointless multiplication instruction. Other .rules files in this directory
// (for example AMD64.rules) contain rules specific to the architecture in the
// filename. The rules here apply to every architecture.
//
// The code for parsing this file lives in rulegen.go; this file generates
// ssa/rewritegeneric.go.
2015-05-28 16:45:33 -07:00
// values are specified using the following format:
2015-06-11 21:29:25 -07:00
// (op <type> [auxint] {aux} arg0 arg1 ...)
2016-03-29 16:39:53 -07:00
// the type, aux, and auxint fields are optional
2015-05-28 16:45:33 -07:00
// on the matching side
2015-06-11 21:29:25 -07:00
// - the type, aux, and auxint fields must match if they are specified.
2016-03-29 16:39:53 -07:00
// - the first occurrence of a variable defines that variable. Subsequent
// uses must match (be == to) the first use.
// - v is defined to be the value matched.
// - an additional conditional can be provided after the match pattern with "&&".
2015-05-28 16:45:33 -07:00
// on the generated side
// - the type of the top-level expression is the same as the one on the left-hand side.
2016-04-26 12:08:31 -07:00
// - the type of any subexpressions must be specified explicitly (or
// be specified in the op's type field).
2015-06-11 21:29:25 -07:00
// - auxint will be 0 if not specified.
2015-05-28 16:45:33 -07:00
// - aux will be nil if not specified.
// blocks are specified using the following format:
// (kind controlvalue succ0 succ1 ...)
// controlvalue must be "nil" or a value expression
// succ* fields must be variables
// For now, the generated successors must be a permutation of the matched successors.
2015-03-23 17:02:11 -07:00
// constant folding
2020-04-10 21:38:49 -07:00
(Trunc16to8 (Const16 [c])) => (Const8 [int8(c)])
(Trunc32to8 (Const32 [c])) => (Const8 [int8(c)])
(Trunc32to16 (Const32 [c])) => (Const16 [int16(c)])
(Trunc64to8 (Const64 [c])) => (Const8 [int8(c)])
(Trunc64to16 (Const64 [c])) => (Const16 [int16(c)])
(Trunc64to32 (Const64 [c])) => (Const32 [int32(c)])
(Cvt64Fto32F (Const64F [c])) => (Const32F [float32(c)])
(Cvt32Fto64F (Const32F [c])) => (Const64F [float64(c)])
(Cvt32to32F (Const32 [c])) => (Const32F [float32(c)])
(Cvt32to64F (Const32 [c])) => (Const64F [float64(c)])
(Cvt64to32F (Const64 [c])) => (Const32F [float32(c)])
(Cvt64to64F (Const64 [c])) => (Const64F [float64(c)])
(Cvt32Fto32 (Const32F [c])) => (Const32 [int32(c)])
(Cvt32Fto64 (Const32F [c])) => (Const64 [int64(c)])
(Cvt64Fto32 (Const64F [c])) => (Const32 [int32(c)])
(Cvt64Fto64 (Const64F [c])) => (Const64 [int64(c)])
(Round32F x:(Const32F)) => x
(Round64F x:(Const64F)) => x
(CvtBoolToUint8 (ConstBool [false])) => (Const8 [0])
(CvtBoolToUint8 (ConstBool [true])) => (Const8 [1])
(Trunc16to8 (ZeroExt8to16 x)) => x
(Trunc32to8 (ZeroExt8to32 x)) => x
(Trunc32to16 (ZeroExt8to32 x)) => (ZeroExt8to16 x)
(Trunc32to16 (ZeroExt16to32 x)) => x
(Trunc64to8 (ZeroExt8to64 x)) => x
(Trunc64to16 (ZeroExt8to64 x)) => (ZeroExt8to16 x)
(Trunc64to16 (ZeroExt16to64 x)) => x
(Trunc64to32 (ZeroExt8to64 x)) => (ZeroExt8to32 x)
(Trunc64to32 (ZeroExt16to64 x)) => (ZeroExt16to32 x)
(Trunc64to32 (ZeroExt32to64 x)) => x
(Trunc16to8 (SignExt8to16 x)) => x
(Trunc32to8 (SignExt8to32 x)) => x
(Trunc32to16 (SignExt8to32 x)) => (SignExt8to16 x)
(Trunc32to16 (SignExt16to32 x)) => x
(Trunc64to8 (SignExt8to64 x)) => x
(Trunc64to16 (SignExt8to64 x)) => (SignExt8to16 x)
(Trunc64to16 (SignExt16to64 x)) => x
(Trunc64to32 (SignExt8to64 x)) => (SignExt8to32 x)
(Trunc64to32 (SignExt16to64 x)) => (SignExt16to32 x)
(Trunc64to32 (SignExt32to64 x)) => x
(ZeroExt8to16 (Const8 [c])) => (Const16 [int16( uint8(c))])
(ZeroExt8to32 (Const8 [c])) => (Const32 [int32( uint8(c))])
(ZeroExt8to64 (Const8 [c])) => (Const64 [int64( uint8(c))])
(ZeroExt16to32 (Const16 [c])) => (Const32 [int32(uint16(c))])
(ZeroExt16to64 (Const16 [c])) => (Const64 [int64(uint16(c))])
(ZeroExt32to64 (Const32 [c])) => (Const64 [int64(uint32(c))])
(SignExt8to16 (Const8 [c])) => (Const16 [int16(c)])
(SignExt8to32 (Const8 [c])) => (Const32 [int32(c)])
(SignExt8to64 (Const8 [c])) => (Const64 [int64(c)])
(SignExt16to32 (Const16 [c])) => (Const32 [int32(c)])
(SignExt16to64 (Const16 [c])) => (Const64 [int64(c)])
(SignExt32to64 (Const32 [c])) => (Const64 [int64(c)])
(Neg8 (Const8 [c])) => (Const8 [-c])
(Neg16 (Const16 [c])) => (Const16 [-c])
(Neg32 (Const32 [c])) => (Const32 [-c])
(Neg64 (Const64 [c])) => (Const64 [-c])
(Neg32F (Const32F [c])) && c != 0 => (Const32F [-c])
(Neg64F (Const64F [c])) && c != 0 => (Const64F [-c])
(Add8 (Const8 [c]) (Const8 [d])) => (Const8 [c+d])
(Add16 (Const16 [c]) (Const16 [d])) => (Const16 [c+d])
(Add32 (Const32 [c]) (Const32 [d])) => (Const32 [c+d])
(Add64 (Const64 [c]) (Const64 [d])) => (Const64 [c+d])
(Add32F (Const32F [c]) (Const32F [d])) && c+d == c+d => (Const32F [c+d])
(Add64F (Const64F [c]) (Const64F [d])) && c+d == c+d => (Const64F [c+d])
(AddPtr <t> x (Const64 [c])) => (OffPtr <t> x [c])
(AddPtr <t> x (Const32 [c])) => (OffPtr <t> x [int64(c)])
(Sub8 (Const8 [c]) (Const8 [d])) => (Const8 [c-d])
(Sub16 (Const16 [c]) (Const16 [d])) => (Const16 [c-d])
(Sub32 (Const32 [c]) (Const32 [d])) => (Const32 [c-d])
(Sub64 (Const64 [c]) (Const64 [d])) => (Const64 [c-d])
(Sub32F (Const32F [c]) (Const32F [d])) && c-d == c-d => (Const32F [c-d])
(Sub64F (Const64F [c]) (Const64F [d])) && c-d == c-d => (Const64F [c-d])
(Mul8 (Const8 [c]) (Const8 [d])) => (Const8 [c*d])
(Mul16 (Const16 [c]) (Const16 [d])) => (Const16 [c*d])
(Mul32 (Const32 [c]) (Const32 [d])) => (Const32 [c*d])
(Mul64 (Const64 [c]) (Const64 [d])) => (Const64 [c*d])
(Mul32F (Const32F [c]) (Const32F [d])) && c*d == c*d => (Const32F [c*d])
(Mul64F (Const64F [c]) (Const64F [d])) && c*d == c*d => (Const64F [c*d])
(And8 (Const8 [c]) (Const8 [d])) => (Const8 [c&d])
(And16 (Const16 [c]) (Const16 [d])) => (Const16 [c&d])
(And32 (Const32 [c]) (Const32 [d])) => (Const32 [c&d])
(And64 (Const64 [c]) (Const64 [d])) => (Const64 [c&d])
(Or8 (Const8 [c]) (Const8 [d])) => (Const8 [c|d])
(Or16 (Const16 [c]) (Const16 [d])) => (Const16 [c|d])
(Or32 (Const32 [c]) (Const32 [d])) => (Const32 [c|d])
(Or64 (Const64 [c]) (Const64 [d])) => (Const64 [c|d])
(Xor8 (Const8 [c]) (Const8 [d])) => (Const8 [c^d])
(Xor16 (Const16 [c]) (Const16 [d])) => (Const16 [c^d])
(Xor32 (Const32 [c]) (Const32 [d])) => (Const32 [c^d])
(Xor64 (Const64 [c]) (Const64 [d])) => (Const64 [c^d])
(Ctz64 (Const64 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz64(c))])
(Ctz32 (Const32 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz32(c))])
(Ctz16 (Const16 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz16(c))])
(Ctz8 (Const8 [c])) && config.PtrSize == 4 => (Const32 [int32(ntz8(c))])
(Ctz64 (Const64 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz64(c))])
(Ctz32 (Const32 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz32(c))])
(Ctz16 (Const16 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz16(c))])
(Ctz8 (Const8 [c])) && config.PtrSize == 8 => (Const64 [int64(ntz8(c))])
(Div8 (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [c/d])
(Div16 (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [c/d])
(Div32 (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [c/d])
(Div64 (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [c/d])
(Div8u (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [int8(uint8(c)/uint8(d))])
(Div16u (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [int16(uint16(c)/uint16(d))])
(Div32u (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [int32(uint32(c)/uint32(d))])
(Div64u (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [int64(uint64(c)/uint64(d))])
(Div32F (Const32F [c]) (Const32F [d])) && c/d == c/d => (Const32F [c/d])
(Div64F (Const64F [c]) (Const64F [d])) && c/d == c/d => (Const64F [c/d])
(Select0 (Div128u (Const64 [0]) lo y)) => (Div64u lo y)
(Select1 (Div128u (Const64 [0]) lo y)) => (Mod64u lo y)
2017-02-13 16:00:09 -08:00
2020-04-11 10:32:21 -07:00
(Not (ConstBool [c])) => (ConstBool [!c])
2018-01-22 10:06:10 -08:00
2022-06-08 23:27:05 +02:00
(Floor (Const64F [c])) => (Const64F [math.Floor(c)])
(Ceil (Const64F [c])) => (Const64F [math.Ceil(c)])
(Trunc (Const64F [c])) => (Const64F [math.Trunc(c)])
(RoundToEven (Const64F [c])) => (Const64F [math.RoundToEven(c)])
2017-02-13 16:00:09 -08:00
// Convert x * 1 to x.
2020-04-11 10:32:21 -07:00
(Mul(8|16|32|64) (Const(8|16|32|64) [1]) x) => x
2022-07-27 09:56:38 -07:00
(Select0 (Mul(32|64)uover (Const(32|64) [1]) x)) => x
(Select1 (Mul(32|64)uover (Const(32|64) [1]) x)) => (ConstBool [false])
2017-02-13 16:00:09 -08:00
// Convert x * -1 to -x.
2020-04-11 10:32:21 -07:00
(Mul(8|16|32|64) (Const(8|16|32|64) [-1]) x) => (Neg(8|16|32|64) x)
2016-07-05 10:03:02 -07:00
2017-02-04 21:20:23 -08:00
// Convert multiplication by a power of two to a shift.
2020-04-11 10:32:21 -07:00
(Mul8 <t> n (Const8 [c])) && isPowerOfTwo8(c) => (Lsh8x64 <t> n (Const64 <typ.UInt64> [log8(c)]))
(Mul16 <t> n (Const16 [c])) && isPowerOfTwo16(c) => (Lsh16x64 <t> n (Const64 <typ.UInt64> [log16(c)]))
(Mul32 <t> n (Const32 [c])) && isPowerOfTwo32(c) => (Lsh32x64 <t> n (Const64 <typ.UInt64> [log32(c)]))
(Mul64 <t> n (Const64 [c])) && isPowerOfTwo64(c) => (Lsh64x64 <t> n (Const64 <typ.UInt64> [log64(c)]))
(Mul8 <t> n (Const8 [c])) && t.IsSigned() && isPowerOfTwo8(-c) => (Neg8 (Lsh8x64 <t> n (Const64 <typ.UInt64> [log8(-c)])))
(Mul16 <t> n (Const16 [c])) && t.IsSigned() && isPowerOfTwo16(-c) => (Neg16 (Lsh16x64 <t> n (Const64 <typ.UInt64> [log16(-c)])))
(Mul32 <t> n (Const32 [c])) && t.IsSigned() && isPowerOfTwo32(-c) => (Neg32 (Lsh32x64 <t> n (Const64 <typ.UInt64> [log32(-c)])))
(Mul64 <t> n (Const64 [c])) && t.IsSigned() && isPowerOfTwo64(-c) => (Neg64 (Lsh64x64 <t> n (Const64 <typ.UInt64> [log64(-c)])))
2017-02-04 21:20:23 -08:00
2020-04-11 10:32:21 -07:00
(Mod8 (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [c % d])
(Mod16 (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [c % d])
(Mod32 (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [c % d])
(Mod64 (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [c % d])
2016-04-04 16:13:35 +02:00
2020-04-11 10:32:21 -07:00
(Mod8u (Const8 [c]) (Const8 [d])) && d != 0 => (Const8 [int8(uint8(c) % uint8(d))])
(Mod16u (Const16 [c]) (Const16 [d])) && d != 0 => (Const16 [int16(uint16(c) % uint16(d))])
(Mod32u (Const32 [c]) (Const32 [d])) && d != 0 => (Const32 [int32(uint32(c) % uint32(d))])
(Mod64u (Const64 [c]) (Const64 [d])) && d != 0 => (Const64 [int64(uint64(c) % uint64(d))])
2016-04-04 16:13:35 +02:00
2020-04-11 10:32:21 -07:00
(Lsh64x64 (Const64 [c]) (Const64 [d])) => (Const64 [c << uint64(d)])
(Rsh64x64 (Const64 [c]) (Const64 [d])) => (Const64 [c >> uint64(d)])
(Rsh64Ux64 (Const64 [c]) (Const64 [d])) => (Const64 [int64(uint64(c) >> uint64(d))])
(Lsh32x64 (Const32 [c]) (Const64 [d])) => (Const32 [c << uint64(d)])
(Rsh32x64 (Const32 [c]) (Const64 [d])) => (Const32 [c >> uint64(d)])
(Rsh32Ux64 (Const32 [c]) (Const64 [d])) => (Const32 [int32(uint32(c) >> uint64(d))])
(Lsh16x64 (Const16 [c]) (Const64 [d])) => (Const16 [c << uint64(d)])
(Rsh16x64 (Const16 [c]) (Const64 [d])) => (Const16 [c >> uint64(d)])
(Rsh16Ux64 (Const16 [c]) (Const64 [d])) => (Const16 [int16(uint16(c) >> uint64(d))])
(Lsh8x64 (Const8 [c]) (Const64 [d])) => (Const8 [c << uint64(d)])
(Rsh8x64 (Const8 [c]) (Const64 [d])) => (Const8 [c >> uint64(d)])
(Rsh8Ux64 (Const8 [c]) (Const64 [d])) => (Const8 [int8(uint8(c) >> uint64(d))])
2016-02-03 06:21:24 -05:00
2016-04-24 21:21:07 +02:00
// Fold IsInBounds when the range of the index cannot exceed the limit.
2020-04-11 10:32:21 -07:00
(IsInBounds (ZeroExt8to32 _) (Const32 [c])) && (1 << 8) <= c => (ConstBool [true])
(IsInBounds (ZeroExt8to64 _) (Const64 [c])) && (1 << 8) <= c => (ConstBool [true])
(IsInBounds (ZeroExt16to32 _) (Const32 [c])) && (1 << 16) <= c => (ConstBool [true])
(IsInBounds (ZeroExt16to64 _) (Const64 [c])) && (1 << 16) <= c => (ConstBool [true])
(IsInBounds x x) => (ConstBool [false])
(IsInBounds (And8 (Const8 [c]) _) (Const8 [d])) && 0 <= c && c < d => (ConstBool [true])
(IsInBounds (ZeroExt8to16 (And8 (Const8 [c]) _)) (Const16 [d])) && 0 <= c && int16(c) < d => (ConstBool [true])
(IsInBounds (ZeroExt8to32 (And8 (Const8 [c]) _)) (Const32 [d])) && 0 <= c && int32(c) < d => (ConstBool [true])
(IsInBounds (ZeroExt8to64 (And8 (Const8 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true])
(IsInBounds (And16 (Const16 [c]) _) (Const16 [d])) && 0 <= c && c < d => (ConstBool [true])
(IsInBounds (ZeroExt16to32 (And16 (Const16 [c]) _)) (Const32 [d])) && 0 <= c && int32(c) < d => (ConstBool [true])
(IsInBounds (ZeroExt16to64 (And16 (Const16 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true])
(IsInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c < d => (ConstBool [true])
(IsInBounds (ZeroExt32to64 (And32 (Const32 [c]) _)) (Const64 [d])) && 0 <= c && int64(c) < d => (ConstBool [true])
(IsInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c < d => (ConstBool [true])
(IsInBounds (Const32 [c]) (Const32 [d])) => (ConstBool [0 <= c && c < d])
(IsInBounds (Const64 [c]) (Const64 [d])) => (ConstBool [0 <= c && c < d])
2016-04-04 16:38:26 +02:00
// (Mod64u x y) is always between 0 (inclusive) and y (exclusive).
2020-04-11 10:32:21 -07:00
(IsInBounds (Mod32u _ y) y) => (ConstBool [true])
(IsInBounds (Mod64u _ y) y) => (ConstBool [true])
2018-05-28 13:49:40 -07:00
// Right shifting an unsigned number limits its value.
2020-04-11 10:32:21 -07:00
(IsInBounds (ZeroExt8to64 (Rsh8Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true])
(IsInBounds (ZeroExt8to32 (Rsh8Ux64 _ (Const64 [c]))) (Const32 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true])
(IsInBounds (ZeroExt8to16 (Rsh8Ux64 _ (Const64 [c]))) (Const16 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true])
(IsInBounds (Rsh8Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 8 && 1<<uint( 8-c)-1 < d => (ConstBool [true])
(IsInBounds (ZeroExt16to64 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true])
(IsInBounds (ZeroExt16to32 (Rsh16Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true])
(IsInBounds (Rsh16Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 16 && 1<<uint(16-c)-1 < d => (ConstBool [true])
(IsInBounds (ZeroExt32to64 (Rsh32Ux64 _ (Const64 [c]))) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d => (ConstBool [true])
(IsInBounds (Rsh32Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 32 && 1<<uint(32-c)-1 < d => (ConstBool [true])
(IsInBounds (Rsh64Ux64 _ (Const64 [c])) (Const64 [d])) && 0 < c && c < 64 && 1<<uint(64-c)-1 < d => (ConstBool [true])
2016-04-04 16:38:26 +02:00
2020-04-11 10:32:21 -07:00
(IsSliceInBounds x x) => (ConstBool [true])
(IsSliceInBounds (And32 (Const32 [c]) _) (Const32 [d])) && 0 <= c && c <= d => (ConstBool [true])
(IsSliceInBounds (And64 (Const64 [c]) _) (Const64 [d])) && 0 <= c && c <= d => (ConstBool [true])
(IsSliceInBounds (Const32 [0]) _) => (ConstBool [true])
(IsSliceInBounds (Const64 [0]) _) => (ConstBool [true])
(IsSliceInBounds (Const32 [c]) (Const32 [d])) => (ConstBool [0 <= c && c <= d])
(IsSliceInBounds (Const64 [c]) (Const64 [d])) => (ConstBool [0 <= c && c <= d])
(IsSliceInBounds (SliceLen x) (SliceCap x)) => (ConstBool [true])
2016-02-17 12:17:11 +01:00
2020-04-11 10:32:21 -07:00
(Eq(64|32|16|8) x x) => (ConstBool [true])
(EqB (ConstBool [c]) (ConstBool [d])) => (ConstBool [c == d])
(EqB (ConstBool [false]) x) => (Not x)
(EqB (ConstBool [true]) x) => x
2016-02-17 12:17:11 +01:00
2020-04-11 10:32:21 -07:00
(Neq(64|32|16|8) x x) => (ConstBool [false])
(NeqB (ConstBool [c]) (ConstBool [d])) => (ConstBool [c != d])
(NeqB (ConstBool [false]) x) => x
(NeqB (ConstBool [true]) x) => (Not x)
(NeqB (Not x) (Not y)) => (NeqB x y)
2015-03-23 17:02:11 -07:00
2020-04-24 16:26:50 -07:00
(Eq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Eq64 (Const64 <t> [c-d]) x)
(Eq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Eq32 (Const32 <t> [c-d]) x)
(Eq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Eq16 (Const16 <t> [c-d]) x)
(Eq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Eq8 (Const8 <t> [c-d]) x)
2016-02-03 19:43:46 +01:00
2020-04-24 16:26:50 -07:00
(Neq64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Neq64 (Const64 <t> [c-d]) x)
(Neq32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Neq32 (Const32 <t> [c-d]) x)
(Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Neq16 (Const16 <t> [c-d]) x)
(Neq8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Neq8 (Const8 <t> [c-d]) x)
2016-02-03 19:43:46 +01:00
2019-05-20 11:55:56 -07:00
// signed integer range: ( c <= x && x (<|<=) d ) -> ( unsigned(x-c) (<|<=) unsigned(d-c) )
2020-04-18 01:30:04 +07:00
(AndB (Leq64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c]))
(AndB (Leq32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c]))
(AndB (Leq16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [d-c]))
(AndB (Leq8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [d-c]))
2019-05-20 11:55:56 -07:00
// signed integer range: ( c < x && x (<|<=) d ) -> ( unsigned(x-(c+1)) (<|<=) unsigned(d-(c+1)) )
2020-04-18 01:30:04 +07:00
(AndB (Less64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1]))
(AndB (Less32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c+1])) (Const32 <x.Type> [d-c-1]))
(AndB (Less16 (Const16 [c]) x) ((Less|Leq)16 x (Const16 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c+1])) (Const16 <x.Type> [d-c-1]))
(AndB (Less8 (Const8 [c]) x) ((Less|Leq)8 x (Const8 [d]))) && d >= c+1 && c+1 > c => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c+1])) (Const8 <x.Type> [d-c-1]))
2019-05-20 11:55:56 -07:00
// unsigned integer range: ( c <= x && x (<|<=) d ) -> ( x-c (<|<=) d-c )
2020-04-18 01:30:04 +07:00
(AndB (Leq64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c) => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c]))
(AndB (Leq32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c) => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c]))
(AndB (Leq16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c) => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c])) (Const16 <x.Type> [d-c]))
(AndB (Leq8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c) => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c])) (Const8 <x.Type> [d-c]))
2019-05-20 11:55:56 -07:00
// unsigned integer range: ( c < x && x (<|<=) d ) -> ( x-(c+1) (<|<=) d-(c+1) )
2020-04-18 01:30:04 +07:00
(AndB (Less64U (Const64 [c]) x) ((Less|Leq)64U x (Const64 [d]))) && uint64(d) >= uint64(c+1) && uint64(c+1) > uint64(c) => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c+1])) (Const64 <x.Type> [d-c-1]))
(AndB (Less32U (Const32 [c]) x) ((Less|Leq)32U x (Const32 [d]))) && uint32(d) >= uint32(c+1) && uint32(c+1) > uint32(c) => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c+1])) (Const32 <x.Type> [d-c-1]))
(AndB (Less16U (Const16 [c]) x) ((Less|Leq)16U x (Const16 [d]))) && uint16(d) >= uint16(c+1) && uint16(c+1) > uint16(c) => ((Less|Leq)16U (Sub16 <x.Type> x (Const16 <x.Type> [c+1])) (Const16 <x.Type> [d-c-1]))
(AndB (Less8U (Const8 [c]) x) ((Less|Leq)8U x (Const8 [d]))) && uint8(d) >= uint8(c+1) && uint8(c+1) > uint8(c) => ((Less|Leq)8U (Sub8 <x.Type> x (Const8 <x.Type> [c+1])) (Const8 <x.Type> [d-c-1]))
2019-05-20 11:55:56 -07:00
// signed integer range: ( c (<|<=) x || x < d ) -> ( unsigned(c-d) (<|<=) unsigned(x-d) )
2020-04-18 01:30:04 +07:00
(OrB ((Less|Leq)64 (Const64 [c]) x) (Less64 x (Const64 [d]))) && c >= d => ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
(OrB ((Less|Leq)32 (Const32 [c]) x) (Less32 x (Const32 [d]))) && c >= d => ((Less|Leq)32U (Const32 <x.Type> [c-d]) (Sub32 <x.Type> x (Const32 <x.Type> [d])))
(OrB ((Less|Leq)16 (Const16 [c]) x) (Less16 x (Const16 [d]))) && c >= d => ((Less|Leq)16U (Const16 <x.Type> [c-d]) (Sub16 <x.Type> x (Const16 <x.Type> [d])))
(OrB ((Less|Leq)8 (Const8 [c]) x) (Less8 x (Const8 [d]))) && c >= d => ((Less|Leq)8U (Const8 <x.Type> [c-d]) (Sub8 <x.Type> x (Const8 <x.Type> [d])))
2019-05-20 11:55:56 -07:00
// signed integer range: ( c (<|<=) x || x <= d ) -> ( unsigned(c-(d+1)) (<|<=) unsigned(x-(d+1)) )
2020-04-18 01:30:04 +07:00
(OrB ((Less|Leq)64 (Const64 [c]) x) (Leq64 x (Const64 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1])))
(OrB ((Less|Leq)32 (Const32 [c]) x) (Leq32 x (Const32 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)32U (Const32 <x.Type> [c-d-1]) (Sub32 <x.Type> x (Const32 <x.Type> [d+1])))
(OrB ((Less|Leq)16 (Const16 [c]) x) (Leq16 x (Const16 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
(OrB ((Less|Leq)8 (Const8 [c]) x) (Leq8 x (Const8 [d]))) && c >= d+1 && d+1 > d => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1])))
2019-05-20 11:55:56 -07:00
// unsigned integer range: ( c (<|<=) x || x < d ) -> ( c-d (<|<=) x-d )
2020-04-18 01:30:04 +07:00
(OrB ((Less|Leq)64U (Const64 [c]) x) (Less64U x (Const64 [d]))) && uint64(c) >= uint64(d) => ((Less|Leq)64U (Const64 <x.Type> [c-d]) (Sub64 <x.Type> x (Const64 <x.Type> [d])))
(OrB ((Less|Leq)32U (Const32 [c]) x) (Less32U x (Const32 [d]))) && uint32(c) >= uint32(d) => ((Less|Leq)32U (Const32 <x.Type> [c-d]) (Sub32 <x.Type> x (Const32 <x.Type> [d])))
(OrB ((Less|Leq)16U (Const16 [c]) x) (Less16U x (Const16 [d]))) && uint16(c) >= uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d]) (Sub16 <x.Type> x (Const16 <x.Type> [d])))
(OrB ((Less|Leq)8U (Const8 [c]) x) (Less8U x (Const8 [d]))) && uint8(c) >= uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d]) (Sub8 <x.Type> x (Const8 <x.Type> [d])))
2019-05-20 11:55:56 -07:00
// unsigned integer range: ( c (<|<=) x || x <= d ) -> ( c-(d+1) (<|<=) x-(d+1) )
2020-04-18 01:30:04 +07:00
(OrB ((Less|Leq)64U (Const64 [c]) x) (Leq64U x (Const64 [d]))) && uint64(c) >= uint64(d+1) && uint64(d+1) > uint64(d) => ((Less|Leq)64U (Const64 <x.Type> [c-d-1]) (Sub64 <x.Type> x (Const64 <x.Type> [d+1])))
(OrB ((Less|Leq)32U (Const32 [c]) x) (Leq32U x (Const32 [d]))) && uint32(c) >= uint32(d+1) && uint32(d+1) > uint32(d) => ((Less|Leq)32U (Const32 <x.Type> [c-d-1]) (Sub32 <x.Type> x (Const32 <x.Type> [d+1])))
(OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
(OrB ((Less|Leq)8U (Const8 [c]) x) (Leq8U x (Const8 [d]))) && uint8(c) >= uint8(d+1) && uint8(d+1) > uint8(d) => ((Less|Leq)8U (Const8 <x.Type> [c-d-1]) (Sub8 <x.Type> x (Const8 <x.Type> [d+1])))
2019-05-20 11:55:56 -07:00
2017-03-30 03:30:22 +00:00
// Canonicalize x-const to x+(-const)
2020-04-24 16:26:50 -07:00
(Sub64 x (Const64 <t> [c])) && x.Op != OpConst64 => (Add64 (Const64 <t> [-c]) x)
(Sub32 x (Const32 <t> [c])) && x.Op != OpConst32 => (Add32 (Const32 <t> [-c]) x)
(Sub16 x (Const16 <t> [c])) && x.Op != OpConst16 => (Add16 (Const16 <t> [-c]) x)
(Sub8 x (Const8 <t> [c])) && x.Op != OpConst8 => (Add8 (Const8 <t> [-c]) x)
2016-02-03 19:43:46 +01:00
2016-08-31 12:37:19 +02:00
// fold negation into comparison operators
2020-04-24 16:26:50 -07:00
(Not (Eq(64|32|16|8|B|Ptr|64F|32F) x y)) => (Neq(64|32|16|8|B|Ptr|64F|32F) x y)
(Not (Neq(64|32|16|8|B|Ptr|64F|32F) x y)) => (Eq(64|32|16|8|B|Ptr|64F|32F) x y)
2018-02-20 20:18:16 +01:00
2020-04-24 16:26:50 -07:00
(Not (Less(64|32|16|8) x y)) => (Leq(64|32|16|8) y x)
(Not (Less(64|32|16|8)U x y)) => (Leq(64|32|16|8)U y x)
(Not (Leq(64|32|16|8) x y)) => (Less(64|32|16|8) y x)
(Not (Leq(64|32|16|8)U x y)) => (Less(64|32|16|8)U y x)
2018-02-20 20:18:16 +01:00
2016-03-01 13:39:47 +01:00
// Distribute multiplication c * (d+x) -> c*d + c*x. Useful for:
// a[i].b = ...; a[i+1].b = ...
2020-04-24 16:26:50 -07:00
(Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x)) =>
2016-04-26 12:08:31 -07:00
(Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
2020-04-24 16:26:50 -07:00
(Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x)) =>
(Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x))
2016-03-01 13:39:47 +01:00
2018-03-24 12:08:00 +01:00
// Rewrite x*y ± x*z to x*(y±z)
(Add(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z))
2020-04-24 16:26:50 -07:00
=> (Mul(64|32|16|8) x (Add(64|32|16|8) <t> y z))
2018-03-24 12:08:00 +01:00
(Sub(64|32|16|8) <t> (Mul(64|32|16|8) x y) (Mul(64|32|16|8) x z))
2020-04-24 16:26:50 -07:00
=> (Mul(64|32|16|8) x (Sub(64|32|16|8) <t> y z))
2017-08-17 15:06:42 +08:00
2016-02-03 06:21:24 -05:00
// rewrite shifts of 8/16/32 bit consts into 64 bit consts to reduce
// the number of the other rewrite rules for const shifts
2020-04-20 03:26:53 +07:00
(Lsh64x32 <t> x (Const32 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh64x16 <t> x (Const16 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh64x8 <t> x (Const8 [c])) => (Lsh64x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh64x32 <t> x (Const32 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh64x16 <t> x (Const16 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh64x8 <t> x (Const8 [c])) => (Rsh64x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh64Ux32 <t> x (Const32 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh64Ux16 <t> x (Const16 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh64Ux8 <t> x (Const8 [c])) => (Rsh64Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh32x32 <t> x (Const32 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh32x16 <t> x (Const16 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh32x8 <t> x (Const8 [c])) => (Lsh32x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh32x32 <t> x (Const32 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh32x16 <t> x (Const16 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh32x8 <t> x (Const8 [c])) => (Rsh32x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh32Ux32 <t> x (Const32 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh32Ux16 <t> x (Const16 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh32Ux8 <t> x (Const8 [c])) => (Rsh32Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh16x32 <t> x (Const32 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh16x16 <t> x (Const16 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh16x8 <t> x (Const8 [c])) => (Lsh16x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh16x32 <t> x (Const32 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh16x16 <t> x (Const16 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh16x8 <t> x (Const8 [c])) => (Rsh16x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh16Ux32 <t> x (Const32 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh16Ux16 <t> x (Const16 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh16Ux8 <t> x (Const8 [c])) => (Rsh16Ux64 x (Const64 <t> [int64(uint8(c))]))
(Lsh8x32 <t> x (Const32 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint32(c))]))
(Lsh8x16 <t> x (Const16 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint16(c))]))
(Lsh8x8 <t> x (Const8 [c])) => (Lsh8x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh8x32 <t> x (Const32 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint32(c))]))
(Rsh8x16 <t> x (Const16 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint16(c))]))
(Rsh8x8 <t> x (Const8 [c])) => (Rsh8x64 x (Const64 <t> [int64(uint8(c))]))
(Rsh8Ux32 <t> x (Const32 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint32(c))]))
(Rsh8Ux16 <t> x (Const16 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint16(c))]))
(Rsh8Ux8 <t> x (Const8 [c])) => (Rsh8Ux64 x (Const64 <t> [int64(uint8(c))]))
2016-02-03 06:21:24 -05:00
// shifts by zero
2020-04-20 03:26:53 +07:00
(Lsh(64|32|16|8)x64 x (Const64 [0])) => x
(Rsh(64|32|16|8)x64 x (Const64 [0])) => x
(Rsh(64|32|16|8)Ux64 x (Const64 [0])) => x
2018-03-24 12:08:00 +01:00
2019-03-09 21:58:16 -07:00
// rotates by multiples of register width
2020-04-20 03:26:53 +07:00
(RotateLeft64 x (Const64 [c])) && c%64 == 0 => x
(RotateLeft32 x (Const32 [c])) && c%32 == 0 => x
(RotateLeft16 x (Const16 [c])) && c%16 == 0 => x
(RotateLeft8 x (Const8 [c])) && c%8 == 0 => x
2019-03-09 21:58:16 -07:00
2018-03-24 12:08:00 +01:00
// zero shifted
2020-04-20 03:26:53 +07:00
(Lsh64x(64|32|16|8) (Const64 [0]) _) => (Const64 [0])
(Rsh64x(64|32|16|8) (Const64 [0]) _) => (Const64 [0])
(Rsh64Ux(64|32|16|8) (Const64 [0]) _) => (Const64 [0])
(Lsh32x(64|32|16|8) (Const32 [0]) _) => (Const32 [0])
(Rsh32x(64|32|16|8) (Const32 [0]) _) => (Const32 [0])
(Rsh32Ux(64|32|16|8) (Const32 [0]) _) => (Const32 [0])
(Lsh16x(64|32|16|8) (Const16 [0]) _) => (Const16 [0])
(Rsh16x(64|32|16|8) (Const16 [0]) _) => (Const16 [0])
(Rsh16Ux(64|32|16|8) (Const16 [0]) _) => (Const16 [0])
(Lsh8x(64|32|16|8) (Const8 [0]) _) => (Const8 [0])
(Rsh8x(64|32|16|8) (Const8 [0]) _) => (Const8 [0])
(Rsh8Ux(64|32|16|8) (Const8 [0]) _) => (Const8 [0])
2016-02-09 19:13:43 +01:00
2016-02-03 06:21:24 -05:00
// large left shifts of all values, and right shifts of unsigned values
2020-04-20 03:26:53 +07:00
((Lsh64|Rsh64U)x64 _ (Const64 [c])) && uint64(c) >= 64 => (Const64 [0])
((Lsh32|Rsh32U)x64 _ (Const64 [c])) && uint64(c) >= 32 => (Const32 [0])
((Lsh16|Rsh16U)x64 _ (Const64 [c])) && uint64(c) >= 16 => (Const16 [0])
((Lsh8|Rsh8U)x64 _ (Const64 [c])) && uint64(c) >= 8 => (Const8 [0])
2016-02-03 06:21:24 -05:00
// combine const shifts
2020-04-20 03:26:53 +07:00
(Lsh64x64 <t> (Lsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh64x64 x (Const64 <t> [c+d]))
(Lsh32x64 <t> (Lsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh32x64 x (Const64 <t> [c+d]))
(Lsh16x64 <t> (Lsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh16x64 x (Const64 <t> [c+d]))
(Lsh8x64 <t> (Lsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Lsh8x64 x (Const64 <t> [c+d]))
2016-02-03 06:21:24 -05:00
2020-04-20 03:26:53 +07:00
(Rsh64x64 <t> (Rsh64x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh64x64 x (Const64 <t> [c+d]))
(Rsh32x64 <t> (Rsh32x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh32x64 x (Const64 <t> [c+d]))
(Rsh16x64 <t> (Rsh16x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh16x64 x (Const64 <t> [c+d]))
(Rsh8x64 <t> (Rsh8x64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh8x64 x (Const64 <t> [c+d]))
2016-02-03 06:21:24 -05:00
2020-04-20 03:26:53 +07:00
(Rsh64Ux64 <t> (Rsh64Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh64Ux64 x (Const64 <t> [c+d]))
(Rsh32Ux64 <t> (Rsh32Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh32Ux64 x (Const64 <t> [c+d]))
(Rsh16Ux64 <t> (Rsh16Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh16Ux64 x (Const64 <t> [c+d]))
(Rsh8Ux64 <t> (Rsh8Ux64 x (Const64 [c])) (Const64 [d])) && !uaddOvf(c,d) => (Rsh8Ux64 x (Const64 <t> [c+d]))
2016-02-03 06:21:24 -05:00
2018-06-02 20:55:20 +02:00
// Remove signed right shift before an unsigned right shift that extracts the sign bit.
2020-04-20 03:26:53 +07:00
(Rsh8Ux64 (Rsh8x64 x _) (Const64 <t> [7] )) => (Rsh8Ux64 x (Const64 <t> [7] ))
(Rsh16Ux64 (Rsh16x64 x _) (Const64 <t> [15])) => (Rsh16Ux64 x (Const64 <t> [15]))
(Rsh32Ux64 (Rsh32x64 x _) (Const64 <t> [31])) => (Rsh32Ux64 x (Const64 <t> [31]))
(Rsh64Ux64 (Rsh64x64 x _) (Const64 <t> [63])) => (Rsh64Ux64 x (Const64 <t> [63]))
2018-06-02 20:55:20 +02:00
2022-08-17 13:09:12 -07:00
// Convert x>>c<<c to x&^(1<<c-1)
(Lsh64x64 i:(Rsh(64|64U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 64 && i.Uses == 1 => (And64 x (Const64 <v.Type> [int64(-1) << c]))
(Lsh32x64 i:(Rsh(32|32U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 32 && i.Uses == 1 => (And32 x (Const32 <v.Type> [int32(-1) << c]))
(Lsh16x64 i:(Rsh(16|16U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 16 && i.Uses == 1 => (And16 x (Const16 <v.Type> [int16(-1) << c]))
(Lsh8x64 i:(Rsh(8|8U)x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 8 && i.Uses == 1 => (And8 x (Const8 <v.Type> [int8(-1) << c]))
// similarly for x<<c>>c
(Rsh64Ux64 i:(Lsh64x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 64 && i.Uses == 1 => (And64 x (Const64 <v.Type> [int64(^uint64(0)>>c)]))
(Rsh32Ux64 i:(Lsh32x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 32 && i.Uses == 1 => (And32 x (Const32 <v.Type> [int32(^uint32(0)>>c)]))
(Rsh16Ux64 i:(Lsh16x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 16 && i.Uses == 1 => (And16 x (Const16 <v.Type> [int16(^uint16(0)>>c)]))
(Rsh8Ux64 i:(Lsh8x64 x (Const64 [c])) (Const64 [c])) && c >= 0 && c < 8 && i.Uses == 1 => (And8 x (Const8 <v.Type> [int8 (^uint8 (0)>>c)]))
2016-04-26 12:08:31 -07:00
// ((x >> c1) << c2) >> c3
2018-01-07 13:23:59 -08:00
(Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3]))
2016-05-28 21:15:24 -07:00
&& uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3)
2020-04-20 03:26:53 +07:00
=> (Rsh(64|32|16|8)Ux64 x (Const64 <typ.UInt64> [c1-c2+c3]))
2016-04-26 12:08:31 -07:00
// ((x << c1) >> c2) << c3
2018-01-07 13:23:59 -08:00
(Lsh(64|32|16|8)x64 (Rsh(64|32|16|8)Ux64 (Lsh(64|32|16|8)x64 x (Const64 [c1])) (Const64 [c2])) (Const64 [c3]))
2016-05-28 21:15:24 -07:00
&& uint64(c1) >= uint64(c2) && uint64(c3) >= uint64(c2) && !uaddOvf(c1-c2, c3)
2020-04-20 03:26:53 +07:00
=> (Lsh(64|32|16|8)x64 x (Const64 <typ.UInt64> [c1-c2+c3]))
2016-04-26 12:08:31 -07:00
2018-02-18 12:58:11 +01:00
// (x >> c) & uppermask = 0
2020-04-20 03:26:53 +07:00
(And64 (Const64 [m]) (Rsh64Ux64 _ (Const64 [c]))) && c >= int64(64-ntz64(m)) => (Const64 [0])
2020-04-21 03:47:02 +07:00
(And32 (Const32 [m]) (Rsh32Ux64 _ (Const64 [c]))) && c >= int64(32-ntz32(m)) => (Const32 [0])
(And16 (Const16 [m]) (Rsh16Ux64 _ (Const64 [c]))) && c >= int64(16-ntz16(m)) => (Const16 [0])
(And8 (Const8 [m]) (Rsh8Ux64 _ (Const64 [c]))) && c >= int64(8-ntz8(m)) => (Const8 [0])
2018-02-18 12:58:11 +01:00
// (x << c) & lowermask = 0
2020-04-20 03:26:53 +07:00
(And64 (Const64 [m]) (Lsh64x64 _ (Const64 [c]))) && c >= int64(64-nlz64(m)) => (Const64 [0])
(And32 (Const32 [m]) (Lsh32x64 _ (Const64 [c]))) && c >= int64(32-nlz32(m)) => (Const32 [0])
(And16 (Const16 [m]) (Lsh16x64 _ (Const64 [c]))) && c >= int64(16-nlz16(m)) => (Const16 [0])
(And8 (Const8 [m]) (Lsh8x64 _ (Const64 [c]))) && c >= int64(8-nlz8(m)) => (Const8 [0])
2018-02-18 12:58:11 +01:00
2017-02-20 15:54:43 -05:00
// replace shifts with zero extensions
2020-04-20 03:26:53 +07:00
(Rsh16Ux64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) => (ZeroExt8to16 (Trunc16to8 <typ.UInt8> x))
(Rsh32Ux64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) => (ZeroExt8to32 (Trunc32to8 <typ.UInt8> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) => (ZeroExt8to64 (Trunc64to8 <typ.UInt8> x))
(Rsh32Ux64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) => (ZeroExt16to32 (Trunc32to16 <typ.UInt16> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) => (ZeroExt16to64 (Trunc64to16 <typ.UInt16> x))
(Rsh64Ux64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) => (ZeroExt32to64 (Trunc64to32 <typ.UInt32> x))
2017-02-20 15:54:43 -05:00
// replace shifts with sign extensions
2020-04-20 03:26:53 +07:00
(Rsh16x64 (Lsh16x64 x (Const64 [8])) (Const64 [8])) => (SignExt8to16 (Trunc16to8 <typ.Int8> x))
(Rsh32x64 (Lsh32x64 x (Const64 [24])) (Const64 [24])) => (SignExt8to32 (Trunc32to8 <typ.Int8> x))
(Rsh64x64 (Lsh64x64 x (Const64 [56])) (Const64 [56])) => (SignExt8to64 (Trunc64to8 <typ.Int8> x))
(Rsh32x64 (Lsh32x64 x (Const64 [16])) (Const64 [16])) => (SignExt16to32 (Trunc32to16 <typ.Int16> x))
(Rsh64x64 (Lsh64x64 x (Const64 [48])) (Const64 [48])) => (SignExt16to64 (Trunc64to16 <typ.Int16> x))
(Rsh64x64 (Lsh64x64 x (Const64 [32])) (Const64 [32])) => (SignExt32to64 (Trunc64to32 <typ.Int32> x))
2017-02-20 15:54:43 -05:00
2015-10-27 17:46:53 -05:00
// constant comparisons
2020-04-24 16:26:50 -07:00
(Eq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c == d])
(Neq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c != d])
(Less(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c < d])
(Leq(64|32|16|8) (Const(64|32|16|8) [c]) (Const(64|32|16|8) [d])) => (ConstBool [c <= d])
(Less64U (Const64 [c]) (Const64 [d])) => (ConstBool [uint64(c) < uint64(d)])
(Less32U (Const32 [c]) (Const32 [d])) => (ConstBool [uint32(c) < uint32(d)])
(Less16U (Const16 [c]) (Const16 [d])) => (ConstBool [uint16(c) < uint16(d)])
(Less8U (Const8 [c]) (Const8 [d])) => (ConstBool [ uint8(c) < uint8(d)])
(Leq64U (Const64 [c]) (Const64 [d])) => (ConstBool [uint64(c) <= uint64(d)])
(Leq32U (Const32 [c]) (Const32 [d])) => (ConstBool [uint32(c) <= uint32(d)])
(Leq16U (Const16 [c]) (Const16 [d])) => (ConstBool [uint16(c) <= uint16(d)])
(Leq8U (Const8 [c]) (Const8 [d])) => (ConstBool [ uint8(c) <= uint8(d)])
(Leq8 (Const8 [0]) (And8 _ (Const8 [c]))) && c >= 0 => (ConstBool [true])
(Leq16 (Const16 [0]) (And16 _ (Const16 [c]))) && c >= 0 => (ConstBool [true])
(Leq32 (Const32 [0]) (And32 _ (Const32 [c]))) && c >= 0 => (ConstBool [true])
(Leq64 (Const64 [0]) (And64 _ (Const64 [c]))) && c >= 0 => (ConstBool [true])
(Leq8 (Const8 [0]) (Rsh8Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true])
(Leq16 (Const16 [0]) (Rsh16Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true])
(Leq32 (Const32 [0]) (Rsh32Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true])
(Leq64 (Const64 [0]) (Rsh64Ux64 _ (Const64 [c]))) && c > 0 => (ConstBool [true])
2020-02-20 17:46:08 +00:00
2021-08-30 22:26:54 +08:00
(Less(64|32|16|8) (Const(64|32|16|8) <t> [0]) x) && isNonNegative(x) => (Neq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x)
(Less(64|32|16|8) x (Const(64|32|16|8) <t> [1])) && isNonNegative(x) => (Eq(64|32|16|8) (Const(64|32|16|8) <t> [0]) x)
2017-09-14 11:04:37 +01:00
// constant floating point comparisons
2020-04-24 16:26:50 -07:00
(Eq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c == d])
(Eq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c == d])
(Neq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c != d])
(Neq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c != d])
(Less32F (Const32F [c]) (Const32F [d])) => (ConstBool [c < d])
(Less64F (Const64F [c]) (Const64F [d])) => (ConstBool [c < d])
(Leq32F (Const32F [c]) (Const32F [d])) => (ConstBool [c <= d])
(Leq64F (Const64F [c]) (Const64F [d])) => (ConstBool [c <= d])
2017-09-14 11:04:37 +01:00
2015-08-14 12:59:33 +02:00
// simplifications
2020-04-24 16:26:50 -07:00
(Or(64|32|16|8) x x) => x
2022-04-18 17:46:43 +00:00
(Or(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
2020-04-24 16:26:50 -07:00
(Or(64|32|16|8) (Const(64|32|16|8) [-1]) _) => (Const(64|32|16|8) [-1])
2022-04-18 17:46:43 +00:00
(Or(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1])
2018-02-22 14:32:57 +01:00
2020-04-24 16:26:50 -07:00
(And(64|32|16|8) x x) => x
(And(64|32|16|8) (Const(64|32|16|8) [-1]) x) => x
2022-04-18 17:46:43 +00:00
(And(64|32|16|8) (Const(64|32|16|8) [0]) _) => (Const(64|32|16|8) [0])
(And(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [0])
2018-02-22 14:32:57 +01:00
2020-04-24 16:26:50 -07:00
(Xor(64|32|16|8) x x) => (Const(64|32|16|8) [0])
(Xor(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
2022-04-18 17:46:43 +00:00
(Xor(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1])
2018-02-22 14:32:57 +01:00
2020-04-24 16:26:50 -07:00
(Add(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
(Sub(64|32|16|8) x x) => (Const(64|32|16|8) [0])
(Mul(64|32|16|8) (Const(64|32|16|8) [0]) _) => (Const(64|32|16|8) [0])
2022-07-27 09:56:38 -07:00
(Select0 (Mul(64|32)uover (Const(64|32) [0]) x)) => (Const(64|32) [0])
(Select1 (Mul(64|32)uover (Const(64|32) [0]) x)) => (ConstBool [false])
2018-02-22 14:32:57 +01:00
2020-04-24 16:26:50 -07:00
(Com(64|32|16|8) (Com(64|32|16|8) x)) => x
(Com(64|32|16|8) (Const(64|32|16|8) [c])) => (Const(64|32|16|8) [^c])
2018-02-22 14:32:57 +01:00
2020-04-24 16:26:50 -07:00
(Neg(64|32|16|8) (Sub(64|32|16|8) x y)) => (Sub(64|32|16|8) y x)
2022-04-18 17:46:43 +00:00
(Add(64|32|16|8) x (Neg(64|32|16|8) y)) => (Sub(64|32|16|8) x y)
(Xor(64|32|16|8) (Const(64|32|16|8) [-1]) x) => (Com(64|32|16|8) x)
(Sub(64|32|16|8) (Neg(64|32|16|8) x) (Com(64|32|16|8) x)) => (Const(64|32|16|8) [1])
(Sub(64|32|16|8) (Com(64|32|16|8) x) (Neg(64|32|16|8) x)) => (Const(64|32|16|8) [-1])
(Add(64|32|16|8) (Com(64|32|16|8) x) x) => (Const(64|32|16|8) [-1])
2018-02-22 14:32:57 +01:00
2019-05-04 16:56:08 -07:00
// ^(x-1) == ^x+1 == -x
2020-04-24 16:26:50 -07:00
(Add(64|32|16|8) (Const(64|32|16|8) [1]) (Com(64|32|16|8) x)) => (Neg(64|32|16|8) x)
(Com(64|32|16|8) (Add(64|32|16|8) (Const(64|32|16|8) [-1]) x)) => (Neg(64|32|16|8) x)
2019-05-04 16:56:08 -07:00
// -(-x) == x
2020-04-24 16:26:50 -07:00
(Neg(64|32|16|8) (Neg(64|32|16|8) x)) => x
2019-05-04 16:56:08 -07:00
// -^x == x+1
2020-04-24 16:26:50 -07:00
(Neg(64|32|16|8) <t> (Com(64|32|16|8) x)) => (Add(64|32|16|8) (Const(64|32|16|8) <t> [1]) x)
2018-03-24 12:08:00 +01:00
2020-04-24 16:26:50 -07:00
(And(64|32|16|8) x (And(64|32|16|8) x y)) => (And(64|32|16|8) x y)
(Or(64|32|16|8) x (Or(64|32|16|8) x y)) => (Or(64|32|16|8) x y)
(Xor(64|32|16|8) x (Xor(64|32|16|8) x y)) => y
2016-04-18 09:28:50 -07:00
2020-07-10 11:39:23 +08:00
// Unsigned comparisons to zero.
(Less(64U|32U|16U|8U) _ (Const(64|32|16|8) [0])) => (ConstBool [false])
(Leq(64U|32U|16U|8U) (Const(64|32|16|8) [0]) _) => (ConstBool [true])
cmd/compile: add minor bit twiddling optimizations
Noticed while adding to the bitset implementation
in cmd/compile/internal/gc.
The (Com (Const)) optimizations were already present
in the AMD64 lowered optimizations.
They trigger 118, 44, 262, and 108 times
respectively for int sizes 8, 16, 32, and 64
in a run of make.bash.
The (Or (And)) optimization is new.
It triggers 3 times for int size 8
and once for int size 64 during make.bash,
in packages internal/poll, reflect,
encoding/asn1, and go/types,
so there is a bit of natural test coverage.
Change-Id: I44072864ff88831d5ec7dce37c516d29df056e98
Reviewed-on: https://go-review.googlesource.com/41758
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-25 15:55:52 -07:00
// Ands clear bits. Ors set bits.
// If a subsequent Or will set all the bits
// that an And cleared, we can skip the And.
// This happens in bitmasking code like:
// x &^= 3 << shift // clear two old bits
// x |= v << shift // set two new bits
// when shift is a small constant and v ends up a constant 3.
2020-04-24 16:26:50 -07:00
(Or8 (And8 x (Const8 [c2])) (Const8 <t> [c1])) && ^(c1 | c2) == 0 => (Or8 (Const8 <t> [c1]) x)
(Or16 (And16 x (Const16 [c2])) (Const16 <t> [c1])) && ^(c1 | c2) == 0 => (Or16 (Const16 <t> [c1]) x)
(Or32 (And32 x (Const32 [c2])) (Const32 <t> [c1])) && ^(c1 | c2) == 0 => (Or32 (Const32 <t> [c1]) x)
(Or64 (And64 x (Const64 [c2])) (Const64 <t> [c1])) && ^(c1 | c2) == 0 => (Or64 (Const64 <t> [c1]) x)
(Trunc64to8 (And64 (Const64 [y]) x)) && y&0xFF == 0xFF => (Trunc64to8 x)
(Trunc64to16 (And64 (Const64 [y]) x)) && y&0xFFFF == 0xFFFF => (Trunc64to16 x)
(Trunc64to32 (And64 (Const64 [y]) x)) && y&0xFFFFFFFF == 0xFFFFFFFF => (Trunc64to32 x)
(Trunc32to8 (And32 (Const32 [y]) x)) && y&0xFF == 0xFF => (Trunc32to8 x)
(Trunc32to16 (And32 (Const32 [y]) x)) && y&0xFFFF == 0xFFFF => (Trunc32to16 x)
(Trunc16to8 (And16 (Const16 [y]) x)) && y&0xFF == 0xFF => (Trunc16to8 x)
(ZeroExt8to64 (Trunc64to8 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 56 => x
(ZeroExt16to64 (Trunc64to16 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 48 => x
(ZeroExt32to64 (Trunc64to32 x:(Rsh64Ux64 _ (Const64 [s])))) && s >= 32 => x
(ZeroExt8to32 (Trunc32to8 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 24 => x
(ZeroExt16to32 (Trunc32to16 x:(Rsh32Ux64 _ (Const64 [s])))) && s >= 16 => x
(ZeroExt8to16 (Trunc16to8 x:(Rsh16Ux64 _ (Const64 [s])))) && s >= 8 => x
(SignExt8to64 (Trunc64to8 x:(Rsh64x64 _ (Const64 [s])))) && s >= 56 => x
(SignExt16to64 (Trunc64to16 x:(Rsh64x64 _ (Const64 [s])))) && s >= 48 => x
(SignExt32to64 (Trunc64to32 x:(Rsh64x64 _ (Const64 [s])))) && s >= 32 => x
(SignExt8to32 (Trunc32to8 x:(Rsh32x64 _ (Const64 [s])))) && s >= 24 => x
(SignExt16to32 (Trunc32to16 x:(Rsh32x64 _ (Const64 [s])))) && s >= 16 => x
(SignExt8to16 (Trunc16to8 x:(Rsh16x64 _ (Const64 [s])))) && s >= 8 => x
(Slicemask (Const32 [x])) && x > 0 => (Const32 [-1])
(Slicemask (Const32 [0])) => (Const32 [0])
(Slicemask (Const64 [x])) && x > 0 => (Const64 [-1])
(Slicemask (Const64 [0])) => (Const64 [0])
2016-10-25 15:49:52 -07:00
2015-11-09 20:54:34 -08:00
// simplifications often used for lengths. e.g. len(s[i:i+5])==5
2020-04-24 16:26:50 -07:00
(Sub(64|32|16|8) (Add(64|32|16|8) x y) x) => y
(Sub(64|32|16|8) (Add(64|32|16|8) x y) y) => x
2021-08-25 12:36:17 +02:00
(Sub(64|32|16|8) (Sub(64|32|16|8) x y) x) => (Neg(64|32|16|8) y)
(Sub(64|32|16|8) x (Add(64|32|16|8) x y)) => (Neg(64|32|16|8) y)
(Add(64|32|16|8) x (Sub(64|32|16|8) y x)) => y
(Add(64|32|16|8) x (Add(64|32|16|8) y (Sub(64|32|16|8) z x))) => (Add(64|32|16|8) y z)
2015-11-09 20:54:34 -08:00
2016-02-09 19:46:26 +01:00
// basic phi simplifications
2020-04-24 16:26:50 -07:00
(Phi (Const8 [c]) (Const8 [c])) => (Const8 [c])
(Phi (Const16 [c]) (Const16 [c])) => (Const16 [c])
(Phi (Const32 [c]) (Const32 [c])) => (Const32 [c])
(Phi (Const64 [c]) (Const64 [c])) => (Const64 [c])
2016-02-09 19:46:26 +01:00
2015-07-27 13:17:45 -07:00
// slice and interface comparisons
2015-09-10 13:53:27 -07:00
// The frontend ensures that we can only compare against nil,
// so we need only compare the first word (interface type or slice ptr).
2020-04-24 16:26:50 -07:00
(EqInter x y) => (EqPtr (ITab x) (ITab y))
(NeqInter x y) => (NeqPtr (ITab x) (ITab y))
(EqSlice x y) => (EqPtr (SlicePtr x) (SlicePtr y))
(NeqSlice x y) => (NeqPtr (SlicePtr x) (SlicePtr y))
2015-07-27 13:17:45 -07:00
2016-02-13 17:37:19 -06:00
// Load of store of same address, with compatibly typed value and same size
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 x _))
&& isSamePtr(p1, p2)
&& t1.Compare(x.Type) == types.CMPeq
2020-04-24 16:26:50 -07:00
&& t1.Size() == t2.Size()
=> x
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 x _)))
&& isSamePtr(p1, p3)
&& t1.Compare(x.Type) == types.CMPeq
2020-04-24 16:26:50 -07:00
&& t1.Size() == t2.Size()
&& disjoint(p3, t3.Size(), p2, t2.Size())
=> x
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 x _))))
&& isSamePtr(p1, p4)
&& t1.Compare(x.Type) == types.CMPeq
2020-04-24 16:26:50 -07:00
&& t1.Size() == t2.Size()
&& disjoint(p4, t4.Size(), p2, t2.Size())
&& disjoint(p4, t4.Size(), p3, t3.Size())
=> x
2018-04-11 22:47:24 +01:00
(Load <t1> p1 (Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 x _)))))
&& isSamePtr(p1, p5)
&& t1.Compare(x.Type) == types.CMPeq
2020-04-24 16:26:50 -07:00
&& t1.Size() == t2.Size()
&& disjoint(p5, t5.Size(), p2, t2.Size())
&& disjoint(p5, t5.Size(), p3, t3.Size())
&& disjoint(p5, t5.Size(), p4, t4.Size())
=> x
2016-02-13 17:37:19 -06:00
2017-09-08 01:31:13 +01:00
// Pass constants through math.Float{32,64}bits and math.Float{32,64}frombits
2020-04-29 15:15:37 -07:00
(Load <t1> p1 (Store {t2} p2 (Const64 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitFloat(t1) && !math.IsNaN(math.Float64frombits(uint64(x))) => (Const64F [math.Float64frombits(uint64(x))])
(Load <t1> p1 (Store {t2} p2 (Const32 [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitFloat(t1) && !math.IsNaN(float64(math.Float32frombits(uint32(x)))) => (Const32F [math.Float32frombits(uint32(x))])
(Load <t1> p1 (Store {t2} p2 (Const64F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 8 && is64BitInt(t1) => (Const64 [int64(math.Float64bits(x))])
(Load <t1> p1 (Store {t2} p2 (Const32F [x]) _)) && isSamePtr(p1,p2) && sizeof(t2) == 4 && is32BitInt(t1) => (Const32 [int32(math.Float32bits(x))])
2018-04-11 22:47:24 +01:00
// Float Loads up to Zeros so they can be constant folded.
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
mem:(Zero [n] p3 _)))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p3)
2018-05-08 16:30:48 +01:00
&& fe.CanSSA(t1)
2020-04-24 16:26:50 -07:00
&& disjoint(op, t1.Size(), p2, t2.Size())
=> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p3) mem)
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
mem:(Zero [n] p4 _))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p4)
2018-05-08 16:30:48 +01:00
&& fe.CanSSA(t1)
2020-04-24 16:26:50 -07:00
&& disjoint(op, t1.Size(), p2, t2.Size())
&& disjoint(op, t1.Size(), p3, t3.Size())
=> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p4) mem)
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
(Store {t4} p4 _
mem:(Zero [n] p5 _)))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p5)
2018-05-08 16:30:48 +01:00
&& fe.CanSSA(t1)
2020-04-24 16:26:50 -07:00
&& disjoint(op, t1.Size(), p2, t2.Size())
&& disjoint(op, t1.Size(), p3, t3.Size())
&& disjoint(op, t1.Size(), p4, t4.Size())
=> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p5) mem)
2018-04-11 22:47:24 +01:00
(Load <t1> op:(OffPtr [o1] p1)
(Store {t2} p2 _
(Store {t3} p3 _
(Store {t4} p4 _
(Store {t5} p5 _
mem:(Zero [n] p6 _))))))
&& o1 >= 0 && o1+t1.Size() <= n && isSamePtr(p1, p6)
2018-05-08 16:30:48 +01:00
&& fe.CanSSA(t1)
2020-04-24 16:26:50 -07:00
&& disjoint(op, t1.Size(), p2, t2.Size())
&& disjoint(op, t1.Size(), p3, t3.Size())
&& disjoint(op, t1.Size(), p4, t4.Size())
&& disjoint(op, t1.Size(), p5, t5.Size())
=> @mem.Block (Load <t1> (OffPtr <op.Type> [o1] p6) mem)
2018-04-11 22:47:24 +01:00
// Zero to Load forwarding.
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& t1.IsBoolean()
&& isSamePtr(p1, p2)
&& n >= o + 1
2020-04-24 16:26:50 -07:00
=> (ConstBool [false])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is8BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 1
2020-04-24 16:26:50 -07:00
=> (Const8 [0])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is16BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 2
2020-04-24 16:26:50 -07:00
=> (Const16 [0])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is32BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 4
2020-04-24 16:26:50 -07:00
=> (Const32 [0])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is64BitInt(t1)
&& isSamePtr(p1, p2)
&& n >= o + 8
2020-04-24 16:26:50 -07:00
=> (Const64 [0])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is32BitFloat(t1)
&& isSamePtr(p1, p2)
&& n >= o + 4
2020-04-24 16:26:50 -07:00
=> (Const32F [0])
2018-04-11 22:47:24 +01:00
(Load <t1> (OffPtr [o] p1) (Zero [n] p2 _))
&& is64BitFloat(t1)
&& isSamePtr(p1, p2)
&& n >= o + 8
2020-04-24 16:26:50 -07:00
=> (Const64F [0])
2017-09-08 01:31:13 +01:00
cmd/compile: add generic rules to eliminate some unnecessary stores
Eliminates stores of values that have just been loaded from the same
location. Handles the common case where there are up to 3 intermediate
stores to non-overlapping struct fields.
For example the loads and stores of x.a, x.b and x.d in the following
function are now removed:
type T struct {
a, b, c, d int
}
func f(x *T) {
y := *x
y.c += 8
*x = y
}
Before this CL (s390x):
TEXT "".f(SB)
MOVD "".x(R15), R5
MOVD (R5), R1
MOVD 8(R5), R2
MOVD 16(R5), R0
MOVD 24(R5), R4
ADD $8, R0, R3
STMG R1, R4, (R5)
RET
After this CL (s390x):
TEXT "".f(SB)
MOVD "".x(R15), R1
MOVD 16(R1), R0
ADD $8, R0, R0
MOVD R0, 16(R1)
RET
In total these rules are triggered ~5091 times during all.bash,
which is broken down as:
Intermediate stores | Triggered
--------------------+----------
0 | 1434
1 | 2508
2 | 888
3 | 261
--------------------+----------
Change-Id: Ia4721ae40146aceec1fdd3e65b0e9283770bfba5
Reviewed-on: https://go-review.googlesource.com/38793
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-03-29 16:37:12 -04:00
// Eliminate stores of values that have just been loaded from the same location.
2018-04-11 22:47:24 +01:00
// We also handle the common case where there are some intermediate stores.
(Store {t1} p1 (Load <t2> p2 mem) mem)
&& isSamePtr(p1, p2)
2020-04-24 16:26:50 -07:00
&& t2.Size() == t1.Size()
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ oldmem))
&& isSamePtr(p1, p2)
2020-04-24 16:26:50 -07:00
&& t2.Size() == t1.Size()
&& disjoint(p1, t1.Size(), p3, t3.Size())
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ oldmem)))
&& isSamePtr(p1, p2)
2020-04-24 16:26:50 -07:00
&& t2.Size() == t1.Size()
&& disjoint(p1, t1.Size(), p3, t3.Size())
&& disjoint(p1, t1.Size(), p4, t4.Size())
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} p1 (Load <t2> p2 oldmem) mem:(Store {t3} p3 _ (Store {t4} p4 _ (Store {t5} p5 _ oldmem))))
&& isSamePtr(p1, p2)
2020-04-24 16:26:50 -07:00
&& t2.Size() == t1.Size()
&& disjoint(p1, t1.Size(), p3, t3.Size())
&& disjoint(p1, t1.Size(), p4, t4.Size())
&& disjoint(p1, t1.Size(), p5, t5.Size())
=> mem
2018-04-11 22:47:24 +01:00
// Don't Store zeros to cleared variables.
(Store {t} (OffPtr [o] p1) x mem:(Zero [n] p2 _))
&& isConstZero(x)
2020-04-24 16:26:50 -07:00
&& o >= 0 && t.Size() + o <= n && isSamePtr(p1, p2)
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Zero [n] p3 _)))
&& isConstZero(x)
2020-04-24 16:26:50 -07:00
&& o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p3)
&& disjoint(op, t1.Size(), p2, t2.Size())
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Zero [n] p4 _))))
&& isConstZero(x)
2020-04-24 16:26:50 -07:00
&& o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p4)
&& disjoint(op, t1.Size(), p2, t2.Size())
&& disjoint(op, t1.Size(), p3, t3.Size())
=> mem
2018-04-11 22:47:24 +01:00
(Store {t1} op:(OffPtr [o1] p1) x mem:(Store {t2} p2 _ (Store {t3} p3 _ (Store {t4} p4 _ (Zero [n] p5 _)))))
&& isConstZero(x)
2020-04-24 16:26:50 -07:00
&& o1 >= 0 && t1.Size() + o1 <= n && isSamePtr(p1, p5)
&& disjoint(op, t1.Size(), p2, t2.Size())
&& disjoint(op, t1.Size(), p3, t3.Size())
&& disjoint(op, t1.Size(), p4, t4.Size())
=> mem
cmd/compile: add generic rules to eliminate some unnecessary stores
Eliminates stores of values that have just been loaded from the same
location. Handles the common case where there are up to 3 intermediate
stores to non-overlapping struct fields.
For example the loads and stores of x.a, x.b and x.d in the following
function are now removed:
type T struct {
a, b, c, d int
}
func f(x *T) {
y := *x
y.c += 8
*x = y
}
Before this CL (s390x):
TEXT "".f(SB)
MOVD "".x(R15), R5
MOVD (R5), R1
MOVD 8(R5), R2
MOVD 16(R5), R0
MOVD 24(R5), R4
ADD $8, R0, R3
STMG R1, R4, (R5)
RET
After this CL (s390x):
TEXT "".f(SB)
MOVD "".x(R15), R1
MOVD 16(R1), R0
ADD $8, R0, R0
MOVD R0, 16(R1)
RET
In total these rules are triggered ~5091 times during all.bash,
which is broken down as:
Intermediate stores | Triggered
--------------------+----------
0 | 1434
1 | 2508
2 | 888
3 | 261
--------------------+----------
Change-Id: Ia4721ae40146aceec1fdd3e65b0e9283770bfba5
Reviewed-on: https://go-review.googlesource.com/38793
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-03-29 16:37:12 -04:00
2016-03-04 14:12:44 -08:00
// Collapse OffPtr
2021-03-22 11:35:02 +01:00
(OffPtr (OffPtr p [y]) [x]) => (OffPtr p [x+y])
2020-04-24 16:26:50 -07:00
(OffPtr p [0]) && v.Type.Compare(p.Type) == types.CMPeq => p
2016-03-04 14:12:44 -08:00
2015-05-18 16:44:20 -07:00
// indexing operations
2015-04-15 15:51:25 -07:00
// Note: bounds check has already been done
2020-04-24 16:26:50 -07:00
(PtrIndex <t> ptr idx) && config.PtrSize == 4 && is32Bit(t.Elem().Size()) => (AddPtr ptr (Mul32 <typ.Int> idx (Const32 <typ.Int> [int32(t.Elem().Size())])))
(PtrIndex <t> ptr idx) && config.PtrSize == 8 => (AddPtr ptr (Mul64 <typ.Int> idx (Const64 <typ.Int> [t.Elem().Size()])))
2016-01-11 21:05:33 -08:00
// struct operations
2020-04-24 16:26:50 -07:00
(StructSelect (StructMake1 x)) => x
(StructSelect [0] (StructMake2 x _)) => x
(StructSelect [1] (StructMake2 _ x)) => x
(StructSelect [0] (StructMake3 x _ _)) => x
(StructSelect [1] (StructMake3 _ x _)) => x
(StructSelect [2] (StructMake3 _ _ x)) => x
(StructSelect [0] (StructMake4 x _ _ _)) => x
(StructSelect [1] (StructMake4 _ x _ _)) => x
(StructSelect [2] (StructMake4 _ _ x _)) => x
(StructSelect [3] (StructMake4 _ _ _ x)) => x
(Load <t> _ _) && t.IsStruct() && t.NumFields() == 0 && fe.CanSSA(t) =>
2016-01-11 21:05:33 -08:00
(StructMake0)
2020-04-24 16:26:50 -07:00
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 1 && fe.CanSSA(t) =>
2016-01-11 21:05:33 -08:00
(StructMake1
2017-03-04 16:17:12 -08:00
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem))
2020-04-24 16:26:50 -07:00
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 2 && fe.CanSSA(t) =>
2016-01-11 21:05:33 -08:00
(StructMake2
2017-03-04 16:17:12 -08:00
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
2016-01-11 21:05:33 -08:00
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem))
2020-04-24 16:26:50 -07:00
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 3 && fe.CanSSA(t) =>
2016-01-11 21:05:33 -08:00
(StructMake3
2017-03-04 16:17:12 -08:00
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
2016-01-11 21:05:33 -08:00
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
(Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem))
2020-04-24 16:26:50 -07:00
(Load <t> ptr mem) && t.IsStruct() && t.NumFields() == 4 && fe.CanSSA(t) =>
2016-01-11 21:05:33 -08:00
(StructMake4
2017-03-04 16:17:12 -08:00
(Load <t.FieldType(0)> (OffPtr <t.FieldType(0).PtrTo()> [0] ptr) mem)
2016-01-11 21:05:33 -08:00
(Load <t.FieldType(1)> (OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] ptr) mem)
(Load <t.FieldType(2)> (OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] ptr) mem)
(Load <t.FieldType(3)> (OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] ptr) mem))
2020-04-24 16:26:50 -07:00
(StructSelect [i] x:(Load <t> ptr mem)) && !fe.CanSSA(t) =>
2016-03-21 16:18:45 -07:00
@x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)
2016-01-11 21:05:33 -08:00
2020-04-24 16:26:50 -07:00
(Store _ (StructMake0) mem) => mem
(Store dst (StructMake1 <t> f0) mem) =>
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(0)} (OffPtr <t.FieldType(0).PtrTo()> [0] dst) f0 mem)
2020-04-24 16:26:50 -07:00
(Store dst (StructMake2 <t> f0 f1) mem) =>
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(1)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(0)}
2017-03-09 13:03:07 -05:00
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
2017-03-04 16:17:12 -08:00
f0 mem))
2020-04-24 16:26:50 -07:00
(Store dst (StructMake3 <t> f0 f1 f2) mem) =>
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(2)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
f2
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(1)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(0)}
2017-03-04 16:17:12 -08:00
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
f0 mem)))
2020-04-24 16:26:50 -07:00
(Store dst (StructMake4 <t> f0 f1 f2 f3) mem) =>
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(3)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(3).PtrTo()> [t.FieldOff(3)] dst)
f3
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(2)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(2).PtrTo()> [t.FieldOff(2)] dst)
f2
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(1)}
2016-01-11 21:05:33 -08:00
(OffPtr <t.FieldType(1).PtrTo()> [t.FieldOff(1)] dst)
f1
2017-03-13 21:51:08 -04:00
(Store {t.FieldType(0)}
2017-03-04 16:17:12 -08:00
(OffPtr <t.FieldType(0).PtrTo()> [0] dst)
f0 mem))))
2015-05-18 16:44:20 -07:00
2016-10-28 11:37:45 -07:00
// Putting struct{*byte} and similar into direct interfaces.
2021-03-22 11:35:02 +01:00
(IMake _typ (StructMake1 val)) => (IMake _typ val)
2020-04-24 16:26:50 -07:00
(StructSelect [0] (IData x)) => (IData x)
2016-10-30 21:10:03 -07:00
cmd/compile: better job of naming compound types
Compound AUTO types weren't named previously. That was because live
variable analysis (plive.go) doesn't handle spilling to compound types.
It can't handle them because there is no valid place to put VARDEFs when
regalloc is spilling compound types.
compound types = multiword builtin types: complex, string, slice, and
interface.
Instead, we split named AUTOs into individual one-word variables. For
example, a string s gets split into a byte ptr s.ptr and an integer
s.len. Those two variables can be spilled to / restored from
independently. As a result, live variable analysis can handle them
because they are one-word objects.
This CL will change how AUTOs are described in DWARF information.
Consider the code:
func f(s string, i int) int {
x := s[i:i+5]
g()
return lookup(x)
}
The old compiler would spill x to two consecutive slots on the stack,
both named x (at offsets 0 and 8). The new compiler spills the pointer
of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a
constant (5) and can be rematerialized for the call to lookup.
So compound objects may not be spilled in their entirety, and even if
they are they won't necessarily be contiguous. Such is the price of
optimization.
Re-enable live variable analysis tests. One test remains disabled, it
fails because of #14904.
Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801
Reviewed-on: https://go-review.googlesource.com/21233
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// un-SSAable values use mem->mem copies
2020-04-24 16:26:50 -07:00
(Store {t} dst (Load src mem) mem) && !fe.CanSSA(t) =>
(Move {t} [t.Size()] dst src mem)
(Store {t} dst (Load src mem) (VarDef {x} mem)) && !fe.CanSSA(t) =>
(Move {t} [t.Size()] dst src (VarDef {x} mem))
2015-08-28 14:24:10 -04:00
2016-10-30 21:10:03 -07:00
// array ops
2020-04-24 16:26:50 -07:00
(ArraySelect (ArrayMake1 x)) => x
2016-10-30 21:10:03 -07:00
2020-04-24 16:26:50 -07:00
(Load <t> _ _) && t.IsArray() && t.NumElem() == 0 =>
2016-10-30 21:10:03 -07:00
(ArrayMake0)
2020-04-24 16:26:50 -07:00
(Load <t> ptr mem) && t.IsArray() && t.NumElem() == 1 && fe.CanSSA(t) =>
2018-04-24 13:39:51 -07:00
(ArrayMake1 (Load <t.Elem()> ptr mem))
2016-10-30 21:10:03 -07:00
2020-04-24 16:26:50 -07:00
(Store _ (ArrayMake0) mem) => mem
(Store dst (ArrayMake1 e) mem) => (Store {e.Type} dst e mem)
2016-10-30 21:10:03 -07:00
2019-10-29 13:14:58 -07:00
// Putting [1]*byte and similar into direct interfaces.
2021-03-22 11:35:02 +01:00
(IMake _typ (ArrayMake1 val)) => (IMake _typ val)
2020-04-24 16:26:50 -07:00
(ArraySelect [0] (IData x)) => (IData x)
2016-10-30 21:10:03 -07:00
2015-05-27 14:52:22 -07:00
// string ops
2016-03-14 19:11:19 +01:00
// Decomposing StringMake and lowering of StringPtr and StringLen
// happens in a later pass, dec, so that these operations are available
cmd/compile: better job of naming compound types
Compound AUTO types weren't named previously. That was because live
variable analysis (plive.go) doesn't handle spilling to compound types.
It can't handle them because there is no valid place to put VARDEFs when
regalloc is spilling compound types.
compound types = multiword builtin types: complex, string, slice, and
interface.
Instead, we split named AUTOs into individual one-word variables. For
example, a string s gets split into a byte ptr s.ptr and an integer
s.len. Those two variables can be spilled to / restored from
independently. As a result, live variable analysis can handle them
because they are one-word objects.
This CL will change how AUTOs are described in DWARF information.
Consider the code:
func f(s string, i int) int {
x := s[i:i+5]
g()
return lookup(x)
}
The old compiler would spill x to two consecutive slots on the stack,
both named x (at offsets 0 and 8). The new compiler spills the pointer
of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a
constant (5) and can be rematerialized for the call to lookup.
So compound objects may not be spilled in their entirety, and even if
they are they won't necessarily be contiguous. Such is the price of
optimization.
Re-enable live variable analysis tests. One test remains disabled, it
fails because of #14904.
Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801
Reviewed-on: https://go-review.googlesource.com/21233
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// to other passes for optimizations.
2020-04-24 16:26:50 -07:00
(StringPtr (StringMake (Addr <t> {s} base) _)) => (Addr <t> {s} base)
(StringLen (StringMake _ (Const64 <t> [c]))) => (Const64 <t> [c])
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(ConstString {str}) && config.PtrSize == 4 && str == "" =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(StringMake (ConstNil) (Const32 <typ.Int> [0]))
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(ConstString {str}) && config.PtrSize == 8 && str == "" =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(StringMake (ConstNil) (Const64 <typ.Int> [0]))
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(ConstString {str}) && config.PtrSize == 4 && str != "" =>
2015-11-02 21:28:13 -08:00
(StringMake
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(Addr <typ.BytePtr> {fe.StringData(str)}
2015-11-02 21:28:13 -08:00
(SB))
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(Const32 <typ.Int> [int32(len(str))]))
(ConstString {str}) && config.PtrSize == 8 && str != "" =>
2015-08-18 10:26:28 -07:00
(StringMake
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(Addr <typ.BytePtr> {fe.StringData(str)}
2015-09-01 09:16:58 -07:00
(SB))
cmd/compile: start implementing strongly typed aux and auxint fields
Right now the Aux and AuxInt fields of ssa.Values are typed as
interface{} and int64, respectively. Each rule that uses these values
must cast them to the type they actually are (*obj.LSym, or int32, or
ValAndOff, etc.), use them, and then cast them back to interface{} or
int64.
We know for each opcode what the types of the Aux and AuxInt fields
should be. So let's modify the rule generator to declare the types to
be what we know they should be, autoconverting to and from the generic
types for us. That way we can make the rules more type safe.
It's difficult to make a single CL for this, so I've coopted the "=>"
token to indicate a rule that is strongly typed. "->" rules are
processed as before. That will let us migrate a few rules at a time in
separate CLs. Hopefully we can reach a state where all rules are
strongly typed and we can drop the distinction.
This CL changes just a few rules to get a feel for what this
transition would look like.
I've decided not to put explicit types in the rules. I think it
makes the rules somewhat clearer, but definitely more verbose.
In particular, the passthrough rules that don't modify the fields
in question are verbose for no real reason.
Change-Id: I63a1b789ac5702e7caf7934cd49f784235d1d73d
Reviewed-on: https://go-review.googlesource.com/c/go/+/190197
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2020-03-19 16:25:08 -07:00
(Const64 <typ.Int> [int64(len(str))]))
2015-08-18 10:26:28 -07:00
// slice ops
cmd/compile: better job of naming compound types
Compound AUTO types weren't named previously. That was because live
variable analysis (plive.go) doesn't handle spilling to compound types.
It can't handle them because there is no valid place to put VARDEFs when
regalloc is spilling compound types.
compound types = multiword builtin types: complex, string, slice, and
interface.
Instead, we split named AUTOs into individual one-word variables. For
example, a string s gets split into a byte ptr s.ptr and an integer
s.len. Those two variables can be spilled to / restored from
independently. As a result, live variable analysis can handle them
because they are one-word objects.
This CL will change how AUTOs are described in DWARF information.
Consider the code:
func f(s string, i int) int {
x := s[i:i+5]
g()
return lookup(x)
}
The old compiler would spill x to two consecutive slots on the stack,
both named x (at offsets 0 and 8). The new compiler spills the pointer
of x to a slot named x.ptr. It doesn't spill x.len at all, as it is a
constant (5) and can be rematerialized for the call to lookup.
So compound objects may not be spilled in their entirety, and even if
they are they won't necessarily be contiguous. Such is the price of
optimization.
Re-enable live variable analysis tests. One test remains disabled, it
fails because of #14904.
Change-Id: I8ef2b5ab91e43a0d2136bfc231c05d100ec0b801
Reviewed-on: https://go-review.googlesource.com/21233
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2016-03-28 11:25:17 -07:00
// Only a few slice rules are provided here. See dec.rules for
// a more comprehensive set.
2020-04-24 16:26:50 -07:00
(SliceLen (SliceMake _ (Const64 <t> [c]) _)) => (Const64 <t> [c])
(SliceCap (SliceMake _ _ (Const64 <t> [c]))) => (Const64 <t> [c])
(SliceLen (SliceMake _ (Const32 <t> [c]) _)) => (Const32 <t> [c])
(SliceCap (SliceMake _ _ (Const32 <t> [c]))) => (Const32 <t> [c])
(SlicePtr (SliceMake (SlicePtr x) _ _)) => (SlicePtr x)
(SliceLen (SliceMake _ (SliceLen x) _)) => (SliceLen x)
(SliceCap (SliceMake _ _ (SliceCap x))) => (SliceCap x)
(SliceCap (SliceMake _ _ (SliceLen x))) => (SliceLen x)
(ConstSlice) && config.PtrSize == 4 =>
2015-11-02 21:28:13 -08:00
(SliceMake
2018-04-24 13:39:51 -07:00
(ConstNil <v.Type.Elem().PtrTo()>)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const32 <typ.Int> [0])
(Const32 <typ.Int> [0]))
2020-04-24 16:26:50 -07:00
(ConstSlice) && config.PtrSize == 8 =>
2015-08-18 10:26:28 -07:00
(SliceMake
2018-04-24 13:39:51 -07:00
(ConstNil <v.Type.Elem().PtrTo()>)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.Int> [0])
(Const64 <typ.Int> [0]))
2015-08-18 10:26:28 -07:00
// interface ops
2020-04-24 16:26:50 -07:00
(ConstInterface) =>
2015-08-18 10:26:28 -07:00
(IMake
2018-02-27 13:46:03 -08:00
(ConstNil <typ.Uintptr>)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(ConstNil <typ.BytePtr>))
2015-06-06 16:03:33 -07:00
2020-04-24 16:26:50 -07:00
(NilCheck (GetG mem) mem) => mem
2015-08-12 11:22:16 -07:00
2020-04-24 16:26:50 -07:00
(If (Not cond) yes no) => (If cond no yes)
(If (ConstBool [c]) yes no) && c => (First yes no)
(If (ConstBool [c]) yes no) && !c => (First no yes)
2015-10-23 14:08:50 -07:00
2022-11-17 18:10:00 +08:00
(Phi <t> nx:(Not x) ny:(Not y)) && nx.Uses == 1 && ny.Uses == 1 => (Not (Phi <t> x y))
2015-10-23 14:08:50 -07:00
// Get rid of Convert ops for pointer arithmetic on unsafe.Pointer.
2020-04-24 16:26:50 -07:00
(Convert (Add(64|32) (Convert ptr mem) off) mem) => (AddPtr ptr off)
(Convert (Convert ptr mem) mem) => ptr
2015-11-02 08:10:26 -08:00
2016-02-05 20:26:18 -08:00
// strength reduction of divide by a constant.
2017-02-13 16:00:09 -08:00
// See ../magic.go for a detailed description of these algorithms.
2016-02-05 20:26:18 -08:00
2017-02-13 16:00:09 -08:00
// Unsigned divide by power of 2. Strength reduce to a shift.
2020-04-24 16:26:50 -07:00
(Div8u n (Const8 [c])) && isPowerOfTwo8(c) => (Rsh8Ux64 n (Const64 <typ.UInt64> [log8(c)]))
(Div16u n (Const16 [c])) && isPowerOfTwo16(c) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16(c)]))
(Div32u n (Const32 [c])) && isPowerOfTwo32(c) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32(c)]))
(Div64u n (Const64 [c])) && isPowerOfTwo64(c) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64(c)]))
(Div64u n (Const64 [-1<<63])) => (Rsh64Ux64 n (Const64 <typ.UInt64> [63]))
2016-02-05 20:26:18 -08:00
cmd/compile: optimize signed non-negative div/mod by a power of 2
This CL optimizes assembly for len() or cap() division
by a power of 2 constants:
func lenDiv(s []int) int {
return len(s) / 16
}
amd64 assembly before the CL:
MOVQ "".s+16(SP), AX
MOVQ AX, CX
SARQ $63, AX
SHRQ $60, AX
ADDQ CX, AX
SARQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
amd64 assembly after the CL:
MOVQ "".s+16(SP), AX
SHRQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
The CL relies on the fact that len() and cap() result cannot
be negative.
Trigger stats for the added SSA rules on linux/amd64 when running
make.bash:
46 Div64
12 Mod64
The added SSA rules may trigger on more cases in the future
when SSA values will be populated with the info on their
lower bounds.
For instance:
func f(i int16) int16 {
if i < 3 {
return -1
}
// Lower bound of i is 3 here -> i is non-negative,
// so unsigned arithmetics may be used here.
return i % 16
}
Change-Id: I8bc6be5a03e71157ced533c01416451ff6f1a7f0
Reviewed-on: https://go-review.googlesource.com/65530
Reviewed-by: Keith Randall <khr@golang.org>
2017-09-23 00:34:37 +03:00
// Signed non-negative divide by power of 2.
2020-04-24 16:26:50 -07:00
(Div8 n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo8(c) => (Rsh8Ux64 n (Const64 <typ.UInt64> [log8(c)]))
(Div16 n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo16(c) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16(c)]))
(Div32 n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo32(c) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32(c)]))
(Div64 n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo64(c) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64(c)]))
(Div64 n (Const64 [-1<<63])) && isNonNegative(n) => (Const64 [0])
cmd/compile: optimize signed non-negative div/mod by a power of 2
This CL optimizes assembly for len() or cap() division
by a power of 2 constants:
func lenDiv(s []int) int {
return len(s) / 16
}
amd64 assembly before the CL:
MOVQ "".s+16(SP), AX
MOVQ AX, CX
SARQ $63, AX
SHRQ $60, AX
ADDQ CX, AX
SARQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
amd64 assembly after the CL:
MOVQ "".s+16(SP), AX
SHRQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
The CL relies on the fact that len() and cap() result cannot
be negative.
Trigger stats for the added SSA rules on linux/amd64 when running
make.bash:
46 Div64
12 Mod64
The added SSA rules may trigger on more cases in the future
when SSA values will be populated with the info on their
lower bounds.
For instance:
func f(i int16) int16 {
if i < 3 {
return -1
}
// Lower bound of i is 3 here -> i is non-negative,
// so unsigned arithmetics may be used here.
return i % 16
}
Change-Id: I8bc6be5a03e71157ced533c01416451ff6f1a7f0
Reviewed-on: https://go-review.googlesource.com/65530
Reviewed-by: Keith Randall <khr@golang.org>
2017-09-23 00:34:37 +03:00
2016-02-05 20:26:18 -08:00
// Unsigned divide, not a power of 2. Strength reduce to a multiply.
2017-02-13 16:00:09 -08:00
// For 8-bit divides, we just do a direct 9-bit by 8-bit multiply.
2020-04-28 19:28:43 -07:00
(Div8u x (Const8 [c])) && umagicOK8(c) =>
2017-02-13 16:00:09 -08:00
(Trunc32to8
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(1<<8+umagic8(c).m)])
2017-02-13 16:00:09 -08:00
(ZeroExt8to32 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [8+umagic8(c).s])))
2017-02-13 16:00:09 -08:00
// For 16-bit divides on 64-bit machines, we do a direct 17-bit by 16-bit multiply.
2020-04-28 19:28:43 -07:00
(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 8 =>
2017-02-13 16:00:09 -08:00
(Trunc64to16
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(1<<16+umagic16(c).m)])
2017-02-13 16:00:09 -08:00
(ZeroExt16to64 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [16+umagic16(c).s])))
2017-02-13 16:00:09 -08:00
// For 16-bit divides on 32-bit machines
2020-04-28 19:28:43 -07:00
(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && umagic16(c).m&1 == 0 =>
2017-02-13 16:00:09 -08:00
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(1<<15+umagic16(c).m/2)])
2017-02-13 16:00:09 -08:00
(ZeroExt16to32 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [16+umagic16(c).s-1])))
(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && c&1 == 0 =>
2017-02-13 16:00:09 -08:00
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(1<<15+(umagic16(c).m+1)/2)])
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [1])))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [16+umagic16(c).s-2])))
(Div16u x (Const16 [c])) && umagicOK16(c) && config.RegSize == 4 && config.useAvg =>
2017-02-13 16:00:09 -08:00
(Trunc32to16
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
2017-02-13 16:00:09 -08:00
(Avg32u
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Lsh32x64 <typ.UInt32> (ZeroExt16to32 x) (Const64 <typ.UInt64> [16]))
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(umagic16(c).m)])
2017-02-13 16:00:09 -08:00
(ZeroExt16to32 x)))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [16+umagic16(c).s-1])))
2017-02-13 16:00:09 -08:00
// For 32-bit divides on 32-bit machines
2020-04-28 19:28:43 -07:00
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && umagic32(c).m&1 == 0 && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Hmul32u <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(1<<31+umagic32(c).m/2)])
2016-02-05 20:26:18 -08:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic32(c).s-1]))
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && c&1 == 0 && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
(Hmul32u <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(1<<31+(umagic32(c).m+1)/2)])
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32> x (Const64 <typ.UInt64> [1])))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic32(c).s-2]))
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 4 && config.useAvg && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh32Ux64 <typ.UInt32>
2017-02-13 16:00:09 -08:00
(Avg32u
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Hmul32u <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(umagic32(c).m)])
2017-02-13 16:00:09 -08:00
x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic32(c).s-1]))
2017-02-13 16:00:09 -08:00
// For 32-bit divides on 64-bit machines
// We'll use a regular (non-hi) multiply for this case.
2020-04-28 19:28:43 -07:00
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && umagic32(c).m&1 == 0 =>
2017-02-13 16:00:09 -08:00
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(1<<31+umagic32(c).m/2)])
2017-02-13 16:00:09 -08:00
(ZeroExt32to64 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [32+umagic32(c).s-1])))
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && c&1 == 0 =>
2017-02-13 16:00:09 -08:00
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(1<<31+(umagic32(c).m+1)/2)])
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [1])))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [32+umagic32(c).s-2])))
(Div32u x (Const32 [c])) && umagicOK32(c) && config.RegSize == 8 && config.useAvg =>
2017-02-13 16:00:09 -08:00
(Trunc64to32
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
2017-02-13 16:00:09 -08:00
(Avg64u
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Lsh64x64 <typ.UInt64> (ZeroExt32to64 x) (Const64 <typ.UInt64> [32]))
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt32> [int64(umagic32(c).m)])
2017-02-13 16:00:09 -08:00
(ZeroExt32to64 x)))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [32+umagic32(c).s-1])))
2017-02-13 16:00:09 -08:00
2020-10-25 11:52:29 +01:00
// For unsigned 64-bit divides on 32-bit machines,
// if the constant fits in 16 bits (so that the last term
// fits in 32 bits), convert to three 32-bit divides by a constant.
//
// If 1<<32 = Q * c + R
// and x = hi << 32 + lo
//
// Then x = (hi/c*c + hi%c) << 32 + lo
// = hi/c*c<<32 + hi%c<<32 + lo
// = hi/c*c<<32 + (hi%c)*(Q*c+R) + lo/c*c + lo%c
// = hi/c*c<<32 + (hi%c)*Q*c + lo/c*c + (hi%c*R+lo%c)
// and x / c = (hi/c)<<32 + (hi%c)*Q + lo/c + (hi%c*R+lo%c)/c
(Div64u x (Const64 [c])) && c > 0 && c <= 0xFFFF && umagicOK32(int32(c)) && config.RegSize == 4 && config.useHmul =>
(Add64
(Add64 <typ.UInt64>
(Add64 <typ.UInt64>
(Lsh64x64 <typ.UInt64>
(ZeroExt32to64
(Div32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)])))
(Const64 <typ.UInt64> [32]))
(ZeroExt32to64 (Div32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))))
(Mul64 <typ.UInt64>
(ZeroExt32to64 <typ.UInt64>
(Mod32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)])))
(Const64 <typ.UInt64> [int64((1<<32)/c)])))
(ZeroExt32to64
(Div32u <typ.UInt32>
(Add32 <typ.UInt32>
(Mod32u <typ.UInt32> (Trunc64to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(c)]))
(Mul32 <typ.UInt32>
(Mod32u <typ.UInt32>
(Trunc64to32 <typ.UInt32> (Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [32])))
(Const32 <typ.UInt32> [int32(c)]))
(Const32 <typ.UInt32> [int32((1<<32)%c)])))
(Const32 <typ.UInt32> [int32(c)]))))
2017-02-13 16:00:09 -08:00
// For 64-bit divides on 64-bit machines
// (64-bit divides on 32-bit machines are lowered to a runtime call by the walk pass.)
2020-04-28 19:28:43 -07:00
(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && umagic64(c).m&1 == 0 && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Hmul64u <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(1<<63+umagic64(c).m/2)])
2016-02-05 20:26:18 -08:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic64(c).s-1]))
(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && c&1 == 0 && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
(Hmul64u <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(1<<63+(umagic64(c).m+1)/2)])
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64> x (Const64 <typ.UInt64> [1])))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic64(c).s-2]))
(Div64u x (Const64 [c])) && umagicOK64(c) && config.RegSize == 8 && config.useAvg && config.useHmul =>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Rsh64Ux64 <typ.UInt64>
2017-02-13 16:00:09 -08:00
(Avg64u
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Hmul64u <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(umagic64(c).m)])
2017-02-13 16:00:09 -08:00
x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [umagic64(c).s-1]))
2017-02-13 16:00:09 -08:00
// Signed divide by a negative constant. Rewrite to divide by a positive constant.
2020-04-24 16:26:50 -07:00
(Div8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 => (Neg8 (Div8 <t> n (Const8 <t> [-c])))
(Div16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 => (Neg16 (Div16 <t> n (Const16 <t> [-c])))
(Div32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 => (Neg32 (Div32 <t> n (Const32 <t> [-c])))
(Div64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 => (Neg64 (Div64 <t> n (Const64 <t> [-c])))
2017-02-13 16:00:09 -08:00
// Dividing by the most-negative number. Result is always 0 except
// if the input is also the most-negative number.
// We can detect that using the sign bit of x & -x.
2020-04-24 16:26:50 -07:00
(Div8 <t> x (Const8 [-1<<7 ])) => (Rsh8Ux64 (And8 <t> x (Neg8 <t> x)) (Const64 <typ.UInt64> [7 ]))
(Div16 <t> x (Const16 [-1<<15])) => (Rsh16Ux64 (And16 <t> x (Neg16 <t> x)) (Const64 <typ.UInt64> [15]))
(Div32 <t> x (Const32 [-1<<31])) => (Rsh32Ux64 (And32 <t> x (Neg32 <t> x)) (Const64 <typ.UInt64> [31]))
(Div64 <t> x (Const64 [-1<<63])) => (Rsh64Ux64 (And64 <t> x (Neg64 <t> x)) (Const64 <typ.UInt64> [63]))
2017-02-13 16:00:09 -08:00
// Signed divide by power of 2.
// n / c = n >> log(c) if n >= 0
// = (n+c-1) >> log(c) if n < 0
// We conditionally add c-1 by adding n>>63>>(64-log(c)) (first shift signed, second shift unsigned).
2020-04-24 16:26:50 -07:00
(Div8 <t> n (Const8 [c])) && isPowerOfTwo8(c) =>
2017-02-13 16:00:09 -08:00
(Rsh8x64
2020-04-24 16:26:50 -07:00
(Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [int64( 8-log8(c))])))
(Const64 <typ.UInt64> [int64(log8(c))]))
(Div16 <t> n (Const16 [c])) && isPowerOfTwo16(c) =>
2017-02-13 16:00:09 -08:00
(Rsh16x64
2020-04-24 16:26:50 -07:00
(Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [int64(16-log16(c))])))
(Const64 <typ.UInt64> [int64(log16(c))]))
(Div32 <t> n (Const32 [c])) && isPowerOfTwo32(c) =>
2017-02-13 16:00:09 -08:00
(Rsh32x64
2020-04-24 16:26:50 -07:00
(Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [int64(32-log32(c))])))
(Const64 <typ.UInt64> [int64(log32(c))]))
(Div64 <t> n (Const64 [c])) && isPowerOfTwo64(c) =>
2017-02-13 16:00:09 -08:00
(Rsh64x64
2020-04-24 16:26:50 -07:00
(Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [int64(64-log64(c))])))
(Const64 <typ.UInt64> [int64(log64(c))]))
2016-02-05 20:26:18 -08:00
// Signed divide, not a power of 2. Strength reduce to a multiply.
2020-04-28 19:28:43 -07:00
(Div8 <t> x (Const8 [c])) && smagicOK8(c) =>
2017-02-13 16:00:09 -08:00
(Sub8 <t>
(Rsh32x64 <t>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(smagic8(c).m)])
2017-02-13 16:00:09 -08:00
(SignExt8to32 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [8+smagic8(c).s]))
2017-02-13 16:00:09 -08:00
(Rsh32x64 <t>
(SignExt8to32 x)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
2020-04-28 19:28:43 -07:00
(Div16 <t> x (Const16 [c])) && smagicOK16(c) =>
2017-02-13 16:00:09 -08:00
(Sub16 <t>
(Rsh32x64 <t>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(smagic16(c).m)])
2017-02-13 16:00:09 -08:00
(SignExt16to32 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [16+smagic16(c).s]))
2017-02-13 16:00:09 -08:00
(Rsh32x64 <t>
(SignExt16to32 x)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
2020-04-28 19:28:43 -07:00
(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 8 =>
2017-02-13 16:00:09 -08:00
(Sub32 <t>
(Rsh64x64 <t>
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(smagic32(c).m)])
2017-02-13 16:00:09 -08:00
(SignExt32to64 x))
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [32+smagic32(c).s]))
2017-02-13 16:00:09 -08:00
(Rsh64x64 <t>
(SignExt32to64 x)
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
2020-04-28 19:28:43 -07:00
(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 == 0 && config.useHmul =>
2017-02-13 16:00:09 -08:00
(Sub32 <t>
(Rsh32x64 <t>
(Hmul32 <t>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(smagic32(c).m/2)])
2017-02-13 16:00:09 -08:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [smagic32(c).s-1]))
2017-02-13 16:00:09 -08:00
(Rsh32x64 <t>
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
2020-04-28 19:28:43 -07:00
(Div32 <t> x (Const32 [c])) && smagicOK32(c) && config.RegSize == 4 && smagic32(c).m&1 != 0 && config.useHmul =>
2017-02-13 16:00:09 -08:00
(Sub32 <t>
(Rsh32x64 <t>
(Add32 <t>
(Hmul32 <t>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(smagic32(c).m)])
2017-02-13 16:00:09 -08:00
x)
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [smagic32(c).s]))
2017-02-13 16:00:09 -08:00
(Rsh32x64 <t>
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [31])))
2020-04-28 19:28:43 -07:00
(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 == 0 && config.useHmul =>
2016-02-05 20:26:18 -08:00
(Sub64 <t>
(Rsh64x64 <t>
(Hmul64 <t>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(smagic64(c).m/2)])
2016-02-05 20:26:18 -08:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [smagic64(c).s-1]))
2016-02-05 20:26:18 -08:00
(Rsh64x64 <t>
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
2020-04-28 19:28:43 -07:00
(Div64 <t> x (Const64 [c])) && smagicOK64(c) && smagic64(c).m&1 != 0 && config.useHmul =>
2016-02-05 20:26:18 -08:00
(Sub64 <t>
(Rsh64x64 <t>
(Add64 <t>
(Hmul64 <t>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(smagic64(c).m)])
2016-02-05 20:26:18 -08:00
x)
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [smagic64(c).s]))
2016-02-05 20:26:18 -08:00
(Rsh64x64 <t>
x
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
(Const64 <typ.UInt64> [63])))
2017-02-13 16:00:09 -08:00
// Unsigned mod by power of 2 constant.
2020-04-26 13:02:32 -07:00
(Mod8u <t> n (Const8 [c])) && isPowerOfTwo8(c) => (And8 n (Const8 <t> [c-1]))
(Mod16u <t> n (Const16 [c])) && isPowerOfTwo16(c) => (And16 n (Const16 <t> [c-1]))
(Mod32u <t> n (Const32 [c])) && isPowerOfTwo32(c) => (And32 n (Const32 <t> [c-1]))
(Mod64u <t> n (Const64 [c])) && isPowerOfTwo64(c) => (And64 n (Const64 <t> [c-1]))
(Mod64u <t> n (Const64 [-1<<63])) => (And64 n (Const64 <t> [1<<63-1]))
2017-02-13 16:00:09 -08:00
cmd/compile: optimize signed non-negative div/mod by a power of 2
This CL optimizes assembly for len() or cap() division
by a power of 2 constants:
func lenDiv(s []int) int {
return len(s) / 16
}
amd64 assembly before the CL:
MOVQ "".s+16(SP), AX
MOVQ AX, CX
SARQ $63, AX
SHRQ $60, AX
ADDQ CX, AX
SARQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
amd64 assembly after the CL:
MOVQ "".s+16(SP), AX
SHRQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
The CL relies on the fact that len() and cap() result cannot
be negative.
Trigger stats for the added SSA rules on linux/amd64 when running
make.bash:
46 Div64
12 Mod64
The added SSA rules may trigger on more cases in the future
when SSA values will be populated with the info on their
lower bounds.
For instance:
func f(i int16) int16 {
if i < 3 {
return -1
}
// Lower bound of i is 3 here -> i is non-negative,
// so unsigned arithmetics may be used here.
return i % 16
}
Change-Id: I8bc6be5a03e71157ced533c01416451ff6f1a7f0
Reviewed-on: https://go-review.googlesource.com/65530
Reviewed-by: Keith Randall <khr@golang.org>
2017-09-23 00:34:37 +03:00
// Signed non-negative mod by power of 2 constant.
2020-04-26 13:02:32 -07:00
(Mod8 <t> n (Const8 [c])) && isNonNegative(n) && isPowerOfTwo8(c) => (And8 n (Const8 <t> [c-1]))
(Mod16 <t> n (Const16 [c])) && isNonNegative(n) && isPowerOfTwo16(c) => (And16 n (Const16 <t> [c-1]))
(Mod32 <t> n (Const32 [c])) && isNonNegative(n) && isPowerOfTwo32(c) => (And32 n (Const32 <t> [c-1]))
(Mod64 <t> n (Const64 [c])) && isNonNegative(n) && isPowerOfTwo64(c) => (And64 n (Const64 <t> [c-1]))
(Mod64 n (Const64 [-1<<63])) && isNonNegative(n) => n
cmd/compile: optimize signed non-negative div/mod by a power of 2
This CL optimizes assembly for len() or cap() division
by a power of 2 constants:
func lenDiv(s []int) int {
return len(s) / 16
}
amd64 assembly before the CL:
MOVQ "".s+16(SP), AX
MOVQ AX, CX
SARQ $63, AX
SHRQ $60, AX
ADDQ CX, AX
SARQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
amd64 assembly after the CL:
MOVQ "".s+16(SP), AX
SHRQ $4, AX
MOVQ AX, "".~r1+32(SP)
RET
The CL relies on the fact that len() and cap() result cannot
be negative.
Trigger stats for the added SSA rules on linux/amd64 when running
make.bash:
46 Div64
12 Mod64
The added SSA rules may trigger on more cases in the future
when SSA values will be populated with the info on their
lower bounds.
For instance:
func f(i int16) int16 {
if i < 3 {
return -1
}
// Lower bound of i is 3 here -> i is non-negative,
// so unsigned arithmetics may be used here.
return i % 16
}
Change-Id: I8bc6be5a03e71157ced533c01416451ff6f1a7f0
Reviewed-on: https://go-review.googlesource.com/65530
Reviewed-by: Keith Randall <khr@golang.org>
2017-09-23 00:34:37 +03:00
2017-02-13 16:00:09 -08:00
// Signed mod by negative constant.
2020-04-26 13:02:32 -07:00
(Mod8 <t> n (Const8 [c])) && c < 0 && c != -1<<7 => (Mod8 <t> n (Const8 <t> [-c]))
(Mod16 <t> n (Const16 [c])) && c < 0 && c != -1<<15 => (Mod16 <t> n (Const16 <t> [-c]))
(Mod32 <t> n (Const32 [c])) && c < 0 && c != -1<<31 => (Mod32 <t> n (Const32 <t> [-c]))
(Mod64 <t> n (Const64 [c])) && c < 0 && c != -1<<63 => (Mod64 <t> n (Const64 <t> [-c]))
2016-02-05 20:26:18 -08:00
2017-02-13 16:00:09 -08:00
// All other mods by constants, do A%B = A-(A/B*B).
2016-02-05 20:26:18 -08:00
// This implements % with two * and a bunch of ancillary ops.
// One of the * is free if the user's code also computes A/B.
2019-04-23 20:49:05 +00:00
(Mod8 <t> x (Const8 [c])) && x.Op != OpConst8 && (c > 0 || c == -1<<7)
2020-04-26 13:02:32 -07:00
=> (Sub8 x (Mul8 <t> (Div8 <t> x (Const8 <t> [c])) (Const8 <t> [c])))
2019-04-23 20:49:05 +00:00
(Mod16 <t> x (Const16 [c])) && x.Op != OpConst16 && (c > 0 || c == -1<<15)
2020-04-26 13:02:32 -07:00
=> (Sub16 x (Mul16 <t> (Div16 <t> x (Const16 <t> [c])) (Const16 <t> [c])))
2019-04-23 20:49:05 +00:00
(Mod32 <t> x (Const32 [c])) && x.Op != OpConst32 && (c > 0 || c == -1<<31)
2020-04-26 13:02:32 -07:00
=> (Sub32 x (Mul32 <t> (Div32 <t> x (Const32 <t> [c])) (Const32 <t> [c])))
2019-04-23 20:49:05 +00:00
(Mod64 <t> x (Const64 [c])) && x.Op != OpConst64 && (c > 0 || c == -1<<63)
2020-04-26 13:02:32 -07:00
=> (Sub64 x (Mul64 <t> (Div64 <t> x (Const64 <t> [c])) (Const64 <t> [c])))
(Mod8u <t> x (Const8 [c])) && x.Op != OpConst8 && c > 0 && umagicOK8( c)
=> (Sub8 x (Mul8 <t> (Div8u <t> x (Const8 <t> [c])) (Const8 <t> [c])))
(Mod16u <t> x (Const16 [c])) && x.Op != OpConst16 && c > 0 && umagicOK16(c)
=> (Sub16 x (Mul16 <t> (Div16u <t> x (Const16 <t> [c])) (Const16 <t> [c])))
(Mod32u <t> x (Const32 [c])) && x.Op != OpConst32 && c > 0 && umagicOK32(c)
=> (Sub32 x (Mul32 <t> (Div32u <t> x (Const32 <t> [c])) (Const32 <t> [c])))
(Mod64u <t> x (Const64 [c])) && x.Op != OpConst64 && c > 0 && umagicOK64(c)
=> (Sub64 x (Mul64 <t> (Div64u <t> x (Const64 <t> [c])) (Const64 <t> [c])))
2016-07-06 10:04:45 -04:00
2019-03-09 21:58:16 -07:00
// For architectures without rotates on less than 32-bits, promote these checks to 32-bit.
2020-04-28 19:28:43 -07:00
(Eq8 (Mod8u x (Const8 [c])) (Const8 [0])) && x.Op != OpConst8 && udivisibleOK8(c) && !hasSmallRotate(config) =>
(Eq32 (Mod32u <typ.UInt32> (ZeroExt8to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(uint8(c))])) (Const32 <typ.UInt32> [0]))
(Eq16 (Mod16u x (Const16 [c])) (Const16 [0])) && x.Op != OpConst16 && udivisibleOK16(c) && !hasSmallRotate(config) =>
(Eq32 (Mod32u <typ.UInt32> (ZeroExt16to32 <typ.UInt32> x) (Const32 <typ.UInt32> [int32(uint16(c))])) (Const32 <typ.UInt32> [0]))
(Eq8 (Mod8 x (Const8 [c])) (Const8 [0])) && x.Op != OpConst8 && sdivisibleOK8(c) && !hasSmallRotate(config) =>
(Eq32 (Mod32 <typ.Int32> (SignExt8to32 <typ.Int32> x) (Const32 <typ.Int32> [int32(c)])) (Const32 <typ.Int32> [0]))
(Eq16 (Mod16 x (Const16 [c])) (Const16 [0])) && x.Op != OpConst16 && sdivisibleOK16(c) && !hasSmallRotate(config) =>
(Eq32 (Mod32 <typ.Int32> (SignExt16to32 <typ.Int32> x) (Const32 <typ.Int32> [int32(c)])) (Const32 <typ.Int32> [0]))
2019-03-09 21:58:16 -07:00
// Divisibility checks x%c == 0 convert to multiply and rotate.
// Note, x%c == 0 is rewritten as x == c*(x/c) during the opt pass
2019-11-15 19:49:30 +00:00
// where (x/c) is performed using multiplication with magic constants.
2019-03-09 21:58:16 -07:00
// To rewrite x%c == 0 requires pattern matching the rewritten expression
// and checking that the division by the same constant wasn't already calculated.
// This check is made by counting uses of the magic constant multiplication.
// Note that if there were an intermediate opt pass, this rule could be applied
// directly on the Div op and magic division rewrites could be delayed to late opt.
2019-04-05 14:05:07 -06:00
// Unsigned divisibility checks convert to multiply and rotate.
2019-03-09 21:58:16 -07:00
(Eq8 x (Mul8 (Const8 [c])
(Trunc32to8
(Rsh32Ux64
mul:(Mul32
(Const32 [m])
(ZeroExt8to32 x))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(1<<8+umagic8(c).m) && s == 8+umagic8(c).s
&& x.Op != OpConst8 && udivisibleOK8(c)
=> (Leq8U
2019-03-09 21:58:16 -07:00
(RotateLeft8 <typ.UInt8>
(Mul8 <typ.UInt8>
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(udivisible8(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(8-udivisible8(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(udivisible8(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq16 x (Mul16 (Const16 [c])
(Trunc64to16
(Rsh64Ux64
mul:(Mul64
(Const64 [m])
(ZeroExt16to64 x))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(1<<16+umagic16(c).m) && s == 16+umagic16(c).s
&& x.Op != OpConst16 && udivisibleOK16(c)
=> (Leq16U
2019-03-09 21:58:16 -07:00
(RotateLeft16 <typ.UInt16>
(Mul16 <typ.UInt16>
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(16-udivisible16(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq16 x (Mul16 (Const16 [c])
(Trunc32to16
(Rsh32Ux64
mul:(Mul32
(Const32 [m])
(ZeroExt16to32 x))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(1<<15+umagic16(c).m/2) && s == 16+umagic16(c).s-1
&& x.Op != OpConst16 && udivisibleOK16(c)
=> (Leq16U
2019-03-09 21:58:16 -07:00
(RotateLeft16 <typ.UInt16>
(Mul16 <typ.UInt16>
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(16-udivisible16(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq16 x (Mul16 (Const16 [c])
(Trunc32to16
(Rsh32Ux64
mul:(Mul32
(Const32 [m])
(Rsh32Ux64 (ZeroExt16to32 x) (Const64 [1])))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(1<<15+(umagic16(c).m+1)/2) && s == 16+umagic16(c).s-2
&& x.Op != OpConst16 && udivisibleOK16(c)
=> (Leq16U
2019-03-09 21:58:16 -07:00
(RotateLeft16 <typ.UInt16>
(Mul16 <typ.UInt16>
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(16-udivisible16(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq16 x (Mul16 (Const16 [c])
(Trunc32to16
(Rsh32Ux64
(Avg32u
(Lsh32x64 (ZeroExt16to32 x) (Const64 [16]))
mul:(Mul32
(Const32 [m])
(ZeroExt16to32 x)))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(umagic16(c).m) && s == 16+umagic16(c).s-1
&& x.Op != OpConst16 && udivisibleOK16(c)
=> (Leq16U
2019-03-09 21:58:16 -07:00
(RotateLeft16 <typ.UInt16>
(Mul16 <typ.UInt16>
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(16-udivisible16(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(udivisible16(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Rsh32Ux64
mul:(Hmul32u
(Const32 [m])
x)
(Const64 [s]))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(1<<31+umagic32(c).m/2) && s == umagic32(c).s-1
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Rsh32Ux64
mul:(Hmul32u
(Const32 <typ.UInt32> [m])
(Rsh32Ux64 x (Const64 [1])))
(Const64 [s]))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(1<<31+(umagic32(c).m+1)/2) && s == umagic32(c).s-2
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Rsh32Ux64
(Avg32u
x
mul:(Hmul32u
(Const32 [m])
x))
(Const64 [s]))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(umagic32(c).m) && s == umagic32(c).s-1
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Trunc64to32
(Rsh64Ux64
mul:(Mul64
(Const64 [m])
(ZeroExt32to64 x))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(1<<31+umagic32(c).m/2) && s == 32+umagic32(c).s-1
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Trunc64to32
(Rsh64Ux64
mul:(Mul64
(Const64 [m])
(Rsh64Ux64 (ZeroExt32to64 x) (Const64 [1])))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(1<<31+(umagic32(c).m+1)/2) && s == 32+umagic32(c).s-2
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq32 x (Mul32 (Const32 [c])
(Trunc64to32
(Rsh64Ux64
(Avg64u
(Lsh64x64 (ZeroExt32to64 x) (Const64 [32]))
mul:(Mul64
(Const64 [m])
(ZeroExt32to64 x)))
(Const64 [s])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(umagic32(c).m) && s == 32+umagic32(c).s-1
&& x.Op != OpConst32 && udivisibleOK32(c)
=> (Leq32U
2019-03-09 21:58:16 -07:00
(RotateLeft32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-udivisible32(c).k)])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(udivisible32(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq64 x (Mul64 (Const64 [c])
(Rsh64Ux64
mul:(Hmul64u
(Const64 [m])
x)
(Const64 [s]))
)
) && v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(1<<63+umagic64(c).m/2) && s == umagic64(c).s-1
&& x.Op != OpConst64 && udivisibleOK64(c)
=> (Leq64U
2019-03-09 21:58:16 -07:00
(RotateLeft64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [64-udivisible64(c).k])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq64 x (Mul64 (Const64 [c])
(Rsh64Ux64
mul:(Hmul64u
(Const64 [m])
(Rsh64Ux64 x (Const64 [1])))
(Const64 [s]))
)
) && v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(1<<63+(umagic64(c).m+1)/2) && s == umagic64(c).s-2
&& x.Op != OpConst64 && udivisibleOK64(c)
=> (Leq64U
2019-03-09 21:58:16 -07:00
(RotateLeft64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [64-udivisible64(c).k])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).max)])
2019-03-09 21:58:16 -07:00
)
(Eq64 x (Mul64 (Const64 [c])
(Rsh64Ux64
(Avg64u
x
mul:(Hmul64u
(Const64 [m])
x))
(Const64 [s]))
)
) && v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(umagic64(c).m) && s == umagic64(c).s-1
&& x.Op != OpConst64 && udivisibleOK64(c)
=> (Leq64U
2019-03-09 21:58:16 -07:00
(RotateLeft64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).m)])
2019-03-09 21:58:16 -07:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [64-udivisible64(c).k])
2019-03-09 21:58:16 -07:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(udivisible64(c).max)])
2019-03-09 21:58:16 -07:00
)
2019-04-05 14:05:07 -06:00
// Signed divisibility checks convert to multiply, add and rotate.
(Eq8 x (Mul8 (Const8 [c])
(Sub8
(Rsh32x64
mul:(Mul32
(Const32 [m])
(SignExt8to32 x))
(Const64 [s]))
(Rsh32x64
(SignExt8to32 x)
(Const64 [31])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(smagic8(c).m) && s == 8+smagic8(c).s
&& x.Op != OpConst8 && sdivisibleOK8(c)
=> (Leq8U
2019-04-05 14:05:07 -06:00
(RotateLeft8 <typ.UInt8>
(Add8 <typ.UInt8>
(Mul8 <typ.UInt8>
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(sdivisible8(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(sdivisible8(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(8-sdivisible8(c).k)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const8 <typ.UInt8> [int8(sdivisible8(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq16 x (Mul16 (Const16 [c])
(Sub16
(Rsh32x64
mul:(Mul32
(Const32 [m])
(SignExt16to32 x))
(Const64 [s]))
(Rsh32x64
(SignExt16to32 x)
(Const64 [31])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(smagic16(c).m) && s == 16+smagic16(c).s
&& x.Op != OpConst16 && sdivisibleOK16(c)
=> (Leq16U
2019-04-05 14:05:07 -06:00
(RotateLeft16 <typ.UInt16>
(Add16 <typ.UInt16>
(Mul16 <typ.UInt16>
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(sdivisible16(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(sdivisible16(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(16-sdivisible16(c).k)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const16 <typ.UInt16> [int16(sdivisible16(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq32 x (Mul32 (Const32 [c])
(Sub32
(Rsh64x64
mul:(Mul64
(Const64 [m])
(SignExt32to64 x))
(Const64 [s]))
(Rsh64x64
(SignExt32to64 x)
(Const64 [63])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(smagic32(c).m) && s == 32+smagic32(c).s
&& x.Op != OpConst32 && sdivisibleOK32(c)
=> (Leq32U
2019-04-05 14:05:07 -06:00
(RotateLeft32 <typ.UInt32>
(Add32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq32 x (Mul32 (Const32 [c])
(Sub32
(Rsh32x64
mul:(Hmul32
(Const32 [m])
x)
(Const64 [s]))
(Rsh32x64
x
(Const64 [31])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(smagic32(c).m/2) && s == smagic32(c).s-1
&& x.Op != OpConst32 && sdivisibleOK32(c)
=> (Leq32U
2019-04-05 14:05:07 -06:00
(RotateLeft32 <typ.UInt32>
(Add32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq32 x (Mul32 (Const32 [c])
(Sub32
(Rsh32x64
(Add32
mul:(Hmul32
(Const32 [m])
x)
x)
(Const64 [s]))
(Rsh32x64
x
(Const64 [31])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int32(smagic32(c).m) && s == smagic32(c).s
&& x.Op != OpConst32 && sdivisibleOK32(c)
=> (Leq32U
2019-04-05 14:05:07 -06:00
(RotateLeft32 <typ.UInt32>
(Add32 <typ.UInt32>
(Mul32 <typ.UInt32>
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(32-sdivisible32(c).k)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const32 <typ.UInt32> [int32(sdivisible32(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq64 x (Mul64 (Const64 [c])
(Sub64
(Rsh64x64
mul:(Hmul64
(Const64 [m])
x)
(Const64 [s]))
(Rsh64x64
x
(Const64 [63])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(smagic64(c).m/2) && s == smagic64(c).s-1
&& x.Op != OpConst64 && sdivisibleOK64(c)
=> (Leq64U
2019-04-05 14:05:07 -06:00
(RotateLeft64 <typ.UInt64>
(Add64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [64-sdivisible64(c).k])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).max)])
2019-04-05 14:05:07 -06:00
)
(Eq64 x (Mul64 (Const64 [c])
(Sub64
(Rsh64x64
(Add64
mul:(Hmul64
(Const64 [m])
x)
x)
(Const64 [s]))
(Rsh64x64
x
(Const64 [63])))
)
)
&& v.Block.Func.pass.name != "opt" && mul.Uses == 1
2020-04-28 19:28:43 -07:00
&& m == int64(smagic64(c).m) && s == smagic64(c).s
&& x.Op != OpConst64 && sdivisibleOK64(c)
=> (Leq64U
2019-04-05 14:05:07 -06:00
(RotateLeft64 <typ.UInt64>
(Add64 <typ.UInt64>
(Mul64 <typ.UInt64>
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).m)])
2019-04-05 14:05:07 -06:00
x)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).a)])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [64-sdivisible64(c).k])
2019-04-05 14:05:07 -06:00
)
2020-04-28 19:28:43 -07:00
(Const64 <typ.UInt64> [int64(sdivisible64(c).max)])
2019-04-05 14:05:07 -06:00
)
2019-04-23 22:04:38 -06:00
// Divisibility check for signed integers for power of two constant are simple mask.
2019-04-05 14:05:07 -06:00
// However, we must match against the rewritten n%c == 0 -> n - c*(n/c) == 0 -> n == c*(n/c)
2019-04-23 22:04:38 -06:00
// where n/c contains fixup code to handle signed n.
2019-09-08 21:50:07 -06:00
((Eq8|Neq8) n (Lsh8x64
2019-04-23 22:04:38 -06:00
(Rsh8x64
(Add8 <t> n (Rsh8Ux64 <t> (Rsh8x64 <t> n (Const64 <typ.UInt64> [ 7])) (Const64 <typ.UInt64> [kbar])))
(Const64 <typ.UInt64> [k]))
(Const64 <typ.UInt64> [k]))
) && k > 0 && k < 7 && kbar == 8 - k
2020-04-26 13:02:32 -07:00
=> ((Eq8|Neq8) (And8 <t> n (Const8 <t> [1<<uint(k)-1])) (Const8 <t> [0]))
2019-04-23 22:04:38 -06:00
2019-09-08 21:50:07 -06:00
((Eq16|Neq16) n (Lsh16x64
2019-04-23 22:04:38 -06:00
(Rsh16x64
(Add16 <t> n (Rsh16Ux64 <t> (Rsh16x64 <t> n (Const64 <typ.UInt64> [15])) (Const64 <typ.UInt64> [kbar])))
(Const64 <typ.UInt64> [k]))
(Const64 <typ.UInt64> [k]))
) && k > 0 && k < 15 && kbar == 16 - k
2020-04-26 13:02:32 -07:00
=> ((Eq16|Neq16) (And16 <t> n (Const16 <t> [1<<uint(k)-1])) (Const16 <t> [0]))
2019-04-23 22:04:38 -06:00
2019-09-08 21:50:07 -06:00
((Eq32|Neq32) n (Lsh32x64
2019-04-23 22:04:38 -06:00
(Rsh32x64
(Add32 <t> n (Rsh32Ux64 <t> (Rsh32x64 <t> n (Const64 <typ.UInt64> [31])) (Const64 <typ.UInt64> [kbar])))
(Const64 <typ.UInt64> [k]))
(Const64 <typ.UInt64> [k]))
) && k > 0 && k < 31 && kbar == 32 - k
2020-04-26 13:02:32 -07:00
=> ((Eq32|Neq32) (And32 <t> n (Const32 <t> [1<<uint(k)-1])) (Const32 <t> [0]))
2019-04-23 22:04:38 -06:00
2019-09-08 21:50:07 -06:00
((Eq64|Neq64) n (Lsh64x64
2019-04-23 22:04:38 -06:00
(Rsh64x64
(Add64 <t> n (Rsh64Ux64 <t> (Rsh64x64 <t> n (Const64 <typ.UInt64> [63])) (Const64 <typ.UInt64> [kbar])))
(Const64 <typ.UInt64> [k]))
(Const64 <typ.UInt64> [k]))
) && k > 0 && k < 63 && kbar == 64 - k
2020-04-26 13:02:32 -07:00
=> ((Eq64|Neq64) (And64 <t> n (Const64 <t> [1<<uint(k)-1])) (Const64 <t> [0]))
2019-04-23 22:04:38 -06:00
2020-04-26 13:02:32 -07:00
(Eq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 => (Eq(8|16|32|64) x y)
(Neq(8|16|32|64) s:(Sub(8|16|32|64) x y) (Const(8|16|32|64) [0])) && s.Uses == 1 => (Neq(8|16|32|64) x y)
2018-04-27 08:50:39 -07:00
2019-05-08 17:02:23 +07:00
// Optimize bitsets
2020-04-20 14:43:30 -07:00
(Eq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [y])) && oneBit8(y)
2020-04-20 14:36:40 -07:00
=> (Neq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [0]))
2020-04-20 14:43:30 -07:00
(Eq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [y])) && oneBit16(y)
2020-04-20 14:36:40 -07:00
=> (Neq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [0]))
2020-04-20 14:43:30 -07:00
(Eq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [y])) && oneBit32(y)
2020-04-20 14:36:40 -07:00
=> (Neq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [0]))
2020-04-20 14:43:30 -07:00
(Eq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [y])) && oneBit64(y)
2020-04-20 14:36:40 -07:00
=> (Neq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [0]))
2020-04-20 14:43:30 -07:00
(Neq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [y])) && oneBit8(y)
2020-04-20 14:36:40 -07:00
=> (Eq8 (And8 <t> x (Const8 <t> [y])) (Const8 <t> [0]))
2020-04-20 14:43:30 -07:00
(Neq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [y])) && oneBit16(y)
2020-04-20 14:36:40 -07:00
=> (Eq16 (And16 <t> x (Const16 <t> [y])) (Const16 <t> [0]))
2020-04-20 14:43:30 -07:00
(Neq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [y])) && oneBit32(y)
2020-04-20 14:36:40 -07:00
=> (Eq32 (And32 <t> x (Const32 <t> [y])) (Const32 <t> [0]))
2020-04-20 14:43:30 -07:00
(Neq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [y])) && oneBit64(y)
2020-04-20 14:36:40 -07:00
=> (Eq64 (And64 <t> x (Const64 <t> [y])) (Const64 <t> [0]))
2019-05-08 17:02:23 +07:00
2017-02-20 08:43:54 -08:00
// Reassociate expressions involving
// constants such that constants come first,
// exposing obvious constant-folding opportunities.
2017-03-30 03:30:22 +00:00
// Reassociate (op (op y C) x) to (op C (op x y)) or similar, where C
2017-02-20 08:43:54 -08:00
// is constant, which pushes constants to the outside
// of the expression. At that point, any constant-folding
// opportunities should be obvious.
cmd/compile: convert 386 port to use addressing modes pass (take 2)
Retrying CL 222782, with a fix that will hopefully stop the random crashing.
The issue with the previous CL is that it does pointer arithmetic
in a way that may briefly generate an out-of-bounds pointer. If an
interrupt happens to occur in that state, the referenced object may
be collected incorrectly.
Suppose there was code that did s[x+c]. The previous CL had a rule
to the effect of ptr + (x + c) -> c + (ptr + x). But ptr+x is not
guaranteed to point to the same object as ptr. In contrast,
ptr+(x+c) is guaranteed to point to the same object as ptr, because
we would have already checked that x+c is in bounds.
For example, strconv.trim used to have this code:
MOVZX -0x1(BX)(DX*1), BP
CMPL $0x30, AL
After CL 222782, it had this code:
LEAL 0(BX)(DX*1), BP
CMPB $0x30, -0x1(BP)
An interrupt between those last two instructions could see BP pointing
outside the backing store of the slice involved.
It's really hard to actually demonstrate a bug. First, you need to
have an interrupt occur at exactly the right time. Then, there must
be no other pointers to the object in question. Since the interrupted
frame will be scanned conservatively, there can't even be a dead
pointer in another register or on the stack. (In the example above,
a bug can't happen because BX still holds the original pointer.)
Then, the object in question needs to be collected (or at least
scanned?) before the interrupted code continues.
This CL needs to handle load combining somewhat differently than CL 222782
because of the new restriction on arithmetic. That's the only real
difference (other than removing the bad rules) from that old CL.
This bug is also present in the amd64 rewrite rules, and we haven't
seen any crashing as a result. I will fix up that code similarly to
this one in a separate CL.
Update #37881
Change-Id: I5f0d584d9bef4696bfe89a61ef0a27c8d507329f
Reviewed-on: https://go-review.googlesource.com/c/go/+/225798
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2020-03-24 13:39:44 -07:00
// Note: don't include AddPtr here! In order to maintain the
// invariant that pointers must stay within the pointed-to object,
// we can't pull part of a pointer computation above the AddPtr.
// See issue 37881.
cmd/compile: add more generic rewrite rules to reassociate (op (op y C) x|C)
With this patch, opt pass can expose more obvious constant-folding
opportunites.
Example:
func test(i int) int {return (i+8)-(i+4)}
The previous version:
MOVD "".i(FP), R0
ADD $8, R0, R1
ADD $4, R0, R0
SUB R0, R1, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
The optimized version:
MOVD $4, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
This patch removes some existing reassociation rules, such as "x+(z-C)",
because the current generic rewrite rules will canonicalize "x-const"
to "x+(-const)", making "x+(z-C)" equal to "x+(z+(-C))".
This patch also adds test cases.
Change-Id: I857108ba0b5fcc18a879eeab38e2551bc4277797
Reviewed-on: https://go-review.googlesource.com/c/go/+/237137
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-05-28 18:11:52 +08:00
// Note: we don't need to handle any (x-C) cases because we already rewrite
// (x-C) to (x+(-C)).
2017-02-20 08:43:54 -08:00
// x + (C + z) -> C + (x + z)
2020-04-23 19:46:49 -07:00
(Add64 (Add64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Add64 <t> z x))
(Add32 (Add32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Add32 <t> z x))
(Add16 (Add16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Add16 <t> z x))
(Add8 (Add8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Add8 <t> z x))
2017-02-20 08:43:54 -08:00
// x + (C - z) -> C + (x - z)
2020-04-23 19:46:49 -07:00
(Add64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> x z))
(Add32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> x z))
(Add16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> x z))
(Add8 (Sub8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Sub8 <t> x z))
2017-02-20 08:43:54 -08:00
// x - (C - z) -> x + (z - C) -> (x + z) - C
2020-04-23 19:46:49 -07:00
(Sub64 x (Sub64 i:(Const64 <t>) z)) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Add64 <t> x z) i)
(Sub32 x (Sub32 i:(Const32 <t>) z)) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Add32 <t> x z) i)
(Sub16 x (Sub16 i:(Const16 <t>) z)) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Add16 <t> x z) i)
(Sub8 x (Sub8 i:(Const8 <t>) z)) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 (Add8 <t> x z) i)
2017-02-20 08:43:54 -08:00
cmd/compile: add more generic rewrite rules to reassociate (op (op y C) x|C)
With this patch, opt pass can expose more obvious constant-folding
opportunites.
Example:
func test(i int) int {return (i+8)-(i+4)}
The previous version:
MOVD "".i(FP), R0
ADD $8, R0, R1
ADD $4, R0, R0
SUB R0, R1, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
The optimized version:
MOVD $4, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
This patch removes some existing reassociation rules, such as "x+(z-C)",
because the current generic rewrite rules will canonicalize "x-const"
to "x+(-const)", making "x+(z-C)" equal to "x+(z+(-C))".
This patch also adds test cases.
Change-Id: I857108ba0b5fcc18a879eeab38e2551bc4277797
Reviewed-on: https://go-review.googlesource.com/c/go/+/237137
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-05-28 18:11:52 +08:00
// x - (z + C) -> x + (-z - C) -> (x - z) - C
(Sub64 x (Add64 z i:(Const64 <t>))) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 (Sub64 <t> x z) i)
(Sub32 x (Add32 z i:(Const32 <t>))) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 (Sub32 <t> x z) i)
(Sub16 x (Add16 z i:(Const16 <t>))) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 (Sub16 <t> x z) i)
(Sub8 x (Add8 z i:(Const8 <t>))) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 (Sub8 <t> x z) i)
// (C - z) - x -> C - (z + x)
(Sub64 (Sub64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Sub64 i (Add64 <t> z x))
(Sub32 (Sub32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Sub32 i (Add32 <t> z x))
(Sub16 (Sub16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Sub16 i (Add16 <t> z x))
(Sub8 (Sub8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Sub8 i (Add8 <t> z x))
// (z + C) -x -> C + (z - x)
(Sub64 (Add64 z i:(Const64 <t>)) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Add64 i (Sub64 <t> z x))
(Sub32 (Add32 z i:(Const32 <t>)) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Add32 i (Sub32 <t> z x))
(Sub16 (Add16 z i:(Const16 <t>)) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Add16 i (Sub16 <t> z x))
(Sub8 (Add8 z i:(Const8 <t>)) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Add8 i (Sub8 <t> z x))
2017-02-20 08:43:54 -08:00
// x & (C & z) -> C & (x & z)
2020-04-23 19:46:49 -07:00
(And64 (And64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (And64 i (And64 <t> z x))
(And32 (And32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (And32 i (And32 <t> z x))
(And16 (And16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (And16 i (And16 <t> z x))
(And8 (And8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (And8 i (And8 <t> z x))
2017-02-20 08:43:54 -08:00
// x | (C | z) -> C | (x | z)
2020-04-23 19:46:49 -07:00
(Or64 (Or64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Or64 i (Or64 <t> z x))
(Or32 (Or32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Or32 i (Or32 <t> z x))
(Or16 (Or16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Or16 i (Or16 <t> z x))
(Or8 (Or8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Or8 i (Or8 <t> z x))
2017-02-20 08:43:54 -08:00
// x ^ (C ^ z) -> C ^ (x ^ z)
2020-04-23 19:46:49 -07:00
(Xor64 (Xor64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Xor64 i (Xor64 <t> z x))
(Xor32 (Xor32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Xor32 i (Xor32 <t> z x))
(Xor16 (Xor16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Xor16 i (Xor16 <t> z x))
(Xor8 (Xor8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Xor8 i (Xor8 <t> z x))
2017-02-20 08:43:54 -08:00
cmd/compile: add more generic rewrite rules to reassociate (op (op y C) x|C)
With this patch, opt pass can expose more obvious constant-folding
opportunites.
Example:
func test(i int) int {return (i+8)-(i+4)}
The previous version:
MOVD "".i(FP), R0
ADD $8, R0, R1
ADD $4, R0, R0
SUB R0, R1, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
The optimized version:
MOVD $4, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
This patch removes some existing reassociation rules, such as "x+(z-C)",
because the current generic rewrite rules will canonicalize "x-const"
to "x+(-const)", making "x+(z-C)" equal to "x+(z+(-C))".
This patch also adds test cases.
Change-Id: I857108ba0b5fcc18a879eeab38e2551bc4277797
Reviewed-on: https://go-review.googlesource.com/c/go/+/237137
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-05-28 18:11:52 +08:00
// x * (D * z) = D * (x * z)
(Mul64 (Mul64 i:(Const64 <t>) z) x) && (z.Op != OpConst64 && x.Op != OpConst64) => (Mul64 i (Mul64 <t> x z))
(Mul32 (Mul32 i:(Const32 <t>) z) x) && (z.Op != OpConst32 && x.Op != OpConst32) => (Mul32 i (Mul32 <t> x z))
(Mul16 (Mul16 i:(Const16 <t>) z) x) && (z.Op != OpConst16 && x.Op != OpConst16) => (Mul16 i (Mul16 <t> x z))
(Mul8 (Mul8 i:(Const8 <t>) z) x) && (z.Op != OpConst8 && x.Op != OpConst8) => (Mul8 i (Mul8 <t> x z))
2017-02-20 08:43:54 -08:00
// C + (D + x) -> (C + D) + x
2020-04-23 22:13:29 -07:00
(Add64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c+d]) x)
(Add32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c+d]) x)
(Add16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Add16 (Const16 <t> [c+d]) x)
(Add8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Add8 (Const8 <t> [c+d]) x)
2017-02-20 08:43:54 -08:00
// C + (D - x) -> (C + D) - x
2020-04-23 22:13:29 -07:00
(Add64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) => (Sub64 (Const64 <t> [c+d]) x)
(Add32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) => (Sub32 (Const32 <t> [c+d]) x)
(Add16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c+d]) x)
(Add8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) => (Sub8 (Const8 <t> [c+d]) x)
2017-02-20 08:43:54 -08:00
// C - (D - x) -> (C - D) + x
2020-04-23 22:13:29 -07:00
(Sub64 (Const64 <t> [c]) (Sub64 (Const64 <t> [d]) x)) => (Add64 (Const64 <t> [c-d]) x)
(Sub32 (Const32 <t> [c]) (Sub32 (Const32 <t> [d]) x)) => (Add32 (Const32 <t> [c-d]) x)
(Sub16 (Const16 <t> [c]) (Sub16 (Const16 <t> [d]) x)) => (Add16 (Const16 <t> [c-d]) x)
(Sub8 (Const8 <t> [c]) (Sub8 (Const8 <t> [d]) x)) => (Add8 (Const8 <t> [c-d]) x)
2017-02-20 08:43:54 -08:00
cmd/compile: add more generic rewrite rules to reassociate (op (op y C) x|C)
With this patch, opt pass can expose more obvious constant-folding
opportunites.
Example:
func test(i int) int {return (i+8)-(i+4)}
The previous version:
MOVD "".i(FP), R0
ADD $8, R0, R1
ADD $4, R0, R0
SUB R0, R1, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
The optimized version:
MOVD $4, R0
MOVD R0, "".~r1+8(FP)
RET (R30)
This patch removes some existing reassociation rules, such as "x+(z-C)",
because the current generic rewrite rules will canonicalize "x-const"
to "x+(-const)", making "x+(z-C)" equal to "x+(z+(-C))".
This patch also adds test cases.
Change-Id: I857108ba0b5fcc18a879eeab38e2551bc4277797
Reviewed-on: https://go-review.googlesource.com/c/go/+/237137
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-05-28 18:11:52 +08:00
// C - (D + x) -> (C - D) - x
(Sub64 (Const64 <t> [c]) (Add64 (Const64 <t> [d]) x)) => (Sub64 (Const64 <t> [c-d]) x)
(Sub32 (Const32 <t> [c]) (Add32 (Const32 <t> [d]) x)) => (Sub32 (Const32 <t> [c-d]) x)
(Sub16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Sub16 (Const16 <t> [c-d]) x)
(Sub8 (Const8 <t> [c]) (Add8 (Const8 <t> [d]) x)) => (Sub8 (Const8 <t> [c-d]) x)
2017-02-20 08:43:54 -08:00
// C & (D & x) -> (C & D) & x
2020-04-23 22:13:29 -07:00
(And64 (Const64 <t> [c]) (And64 (Const64 <t> [d]) x)) => (And64 (Const64 <t> [c&d]) x)
(And32 (Const32 <t> [c]) (And32 (Const32 <t> [d]) x)) => (And32 (Const32 <t> [c&d]) x)
(And16 (Const16 <t> [c]) (And16 (Const16 <t> [d]) x)) => (And16 (Const16 <t> [c&d]) x)
(And8 (Const8 <t> [c]) (And8 (Const8 <t> [d]) x)) => (And8 (Const8 <t> [c&d]) x)
2017-02-20 08:43:54 -08:00
// C | (D | x) -> (C | D) | x
2020-04-23 22:13:29 -07:00
(Or64 (Const64 <t> [c]) (Or64 (Const64 <t> [d]) x)) => (Or64 (Const64 <t> [c|d]) x)
(Or32 (Const32 <t> [c]) (Or32 (Const32 <t> [d]) x)) => (Or32 (Const32 <t> [c|d]) x)
(Or16 (Const16 <t> [c]) (Or16 (Const16 <t> [d]) x)) => (Or16 (Const16 <t> [c|d]) x)
(Or8 (Const8 <t> [c]) (Or8 (Const8 <t> [d]) x)) => (Or8 (Const8 <t> [c|d]) x)
2017-02-20 08:43:54 -08:00
// C ^ (D ^ x) -> (C ^ D) ^ x
2020-04-23 22:13:29 -07:00
(Xor64 (Const64 <t> [c]) (Xor64 (Const64 <t> [d]) x)) => (Xor64 (Const64 <t> [c^d]) x)
(Xor32 (Const32 <t> [c]) (Xor32 (Const32 <t> [d]) x)) => (Xor32 (Const32 <t> [c^d]) x)
(Xor16 (Const16 <t> [c]) (Xor16 (Const16 <t> [d]) x)) => (Xor16 (Const16 <t> [c^d]) x)
(Xor8 (Const8 <t> [c]) (Xor8 (Const8 <t> [d]) x)) => (Xor8 (Const8 <t> [c^d]) x)
2017-02-20 08:43:54 -08:00
// C * (D * x) = (C * D) * x
2020-04-23 22:13:29 -07:00
(Mul64 (Const64 <t> [c]) (Mul64 (Const64 <t> [d]) x)) => (Mul64 (Const64 <t> [c*d]) x)
(Mul32 (Const32 <t> [c]) (Mul32 (Const32 <t> [d]) x)) => (Mul32 (Const32 <t> [c*d]) x)
(Mul16 (Const16 <t> [c]) (Mul16 (Const16 <t> [d]) x)) => (Mul16 (Const16 <t> [c*d]) x)
(Mul8 (Const8 <t> [c]) (Mul8 (Const8 <t> [d]) x)) => (Mul8 (Const8 <t> [c*d]) x)
2017-02-20 08:43:54 -08:00
2016-07-06 10:04:45 -04:00
// floating point optimizations
2020-04-23 22:22:00 -07:00
(Mul(32|64)F x (Const(32|64)F [1])) => x
(Mul32F x (Const32F [-1])) => (Neg32F x)
(Mul64F x (Const64F [-1])) => (Neg64F x)
(Mul32F x (Const32F [2])) => (Add32F x x)
(Mul64F x (Const64F [2])) => (Add64F x x)
2018-02-22 14:32:57 +01:00
2020-04-23 22:22:00 -07:00
(Div32F x (Const32F <t> [c])) && reciprocalExact32(c) => (Mul32F x (Const32F <t> [1/c]))
(Div64F x (Const64F <t> [c])) && reciprocalExact64(c) => (Mul64F x (Const64F <t> [1/c]))
2016-08-20 22:05:47 -07:00
2020-12-07 19:15:15 +08:00
// rewrite single-precision sqrt expression "float32(math.Sqrt(float64(x)))"
(Cvt64Fto32F sqrt0:(Sqrt (Cvt32Fto64F x))) && sqrt0.Uses==1 => (Sqrt32 x)
2020-04-23 22:22:00 -07:00
(Sqrt (Const64F [c])) && !math.IsNaN(math.Sqrt(c)) => (Const64F [math.Sqrt(c)])
2016-08-26 15:41:51 -04:00
2020-08-13 20:43:39 -04:00
// for rewriting results of some late-expanded rewrites (below)
2021-03-22 11:35:02 +01:00
(SelectN [0] (MakeResult x ___)) => x
(SelectN [1] (MakeResult x y ___)) => y
(SelectN [2] (MakeResult x y z ___)) => z
2020-08-13 20:43:39 -04:00
// for late-expanded calls, recognize newobject and remove zeroing and nilchecks
2020-08-12 23:47:57 -04:00
(Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call))
&& isSameCall(call.Aux, "runtime.newobject")
=> mem
(Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call))
&& isConstZero(x)
&& isSameCall(call.Aux, "runtime.newobject")
=> mem
(Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call))
&& isConstZero(x)
&& isSameCall(call.Aux, "runtime.newobject")
=> mem
2021-02-04 16:42:35 -05:00
(NilCheck (SelectN [0] call:(StaticLECall _ _)) _)
2020-08-12 23:47:57 -04:00
&& isSameCall(call.Aux, "runtime.newobject")
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
=> (Invalid)
2021-02-04 16:42:35 -05:00
(NilCheck (OffPtr (SelectN [0] call:(StaticLECall _ _))) _)
2020-08-12 23:47:57 -04:00
&& isSameCall(call.Aux, "runtime.newobject")
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
=> (Invalid)
2020-08-13 20:43:39 -04:00
// for late-expanded calls, recognize memequal applied to a single constant byte
2021-06-16 16:25:57 +00:00
// Support is limited by 1, 2, 4, 8 byte sizes
2020-08-13 20:43:39 -04:00
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [1]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
=> (MakeResult (Eq8 (Load <typ.Int8> sptr mem) (Const8 <typ.Int8> [int8(read8(scon,0))])) mem)
2021-06-16 16:25:57 +00:00
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [2]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
&& canLoadUnaligned(config)
=> (MakeResult (Eq16 (Load <typ.Int16> sptr mem) (Const16 <typ.Int16> [int16(read16(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [4]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
&& canLoadUnaligned(config)
=> (MakeResult (Eq32 (Load <typ.Int32> sptr mem) (Const32 <typ.Int32> [int32(read32(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
(StaticLECall {callAux} sptr (Addr {scon} (SB)) (Const64 [8]) mem)
&& isSameCall(callAux, "runtime.memequal")
&& symIsRO(scon)
&& canLoadUnaligned(config) && config.PtrSize == 8
=> (MakeResult (Eq64 (Load <typ.Int64> sptr mem) (Const64 <typ.Int64> [int64(read64(scon,0,config.ctxt.Arch.ByteOrder))])) mem)
2022-11-30 09:45:29 +01:00
// Recognise make([]T, 0) and replace it with a pointer to the zerobase
(SelectN [0] call:(StaticLECall _ (Const(64|32) [0]) (Const(64|32) [0]) _))
&& isSameCall(call.Aux, "runtime.makeslice")
&& clobberIfDead(call)
=> (Addr {ir.Syms.Zerobase} (SB))
(SelectN [1] call:(StaticLECall _ (Const(64|32) [0]) (Const(64|32) [0]) mem))
&& isSameCall(call.Aux, "runtime.makeslice")
&& clobberIfDead(call)
=> mem
2018-04-12 15:24:08 +01:00
// Evaluate constant address comparisons.
2020-04-23 22:31:32 -07:00
(EqPtr x x) => (ConstBool [true])
(NeqPtr x x) => (ConstBool [false])
2021-03-22 11:35:02 +01:00
(EqPtr (Addr {x} _) (Addr {y} _)) => (ConstBool [x == y])
(EqPtr (Addr {x} _) (OffPtr [o] (Addr {y} _))) => (ConstBool [x == y && o == 0])
(EqPtr (OffPtr [o1] (Addr {x} _)) (OffPtr [o2] (Addr {y} _))) => (ConstBool [x == y && o1 == o2])
(NeqPtr (Addr {x} _) (Addr {y} _)) => (ConstBool [x != y])
(NeqPtr (Addr {x} _) (OffPtr [o] (Addr {y} _))) => (ConstBool [x != y || o != 0])
(NeqPtr (OffPtr [o1] (Addr {x} _)) (OffPtr [o2] (Addr {y} _))) => (ConstBool [x != y || o1 != o2])
(EqPtr (LocalAddr {x} _ _) (LocalAddr {y} _ _)) => (ConstBool [x == y])
(EqPtr (LocalAddr {x} _ _) (OffPtr [o] (LocalAddr {y} _ _))) => (ConstBool [x == y && o == 0])
(EqPtr (OffPtr [o1] (LocalAddr {x} _ _)) (OffPtr [o2] (LocalAddr {y} _ _))) => (ConstBool [x == y && o1 == o2])
(NeqPtr (LocalAddr {x} _ _) (LocalAddr {y} _ _)) => (ConstBool [x != y])
(NeqPtr (LocalAddr {x} _ _) (OffPtr [o] (LocalAddr {y} _ _))) => (ConstBool [x != y || o != 0])
(NeqPtr (OffPtr [o1] (LocalAddr {x} _ _)) (OffPtr [o2] (LocalAddr {y} _ _))) => (ConstBool [x != y || o1 != o2])
2020-04-23 22:31:32 -07:00
(EqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) => (ConstBool [o1 == 0])
(NeqPtr (OffPtr [o1] p1) p2) && isSamePtr(p1, p2) => (ConstBool [o1 != 0])
(EqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) => (ConstBool [o1 == o2])
(NeqPtr (OffPtr [o1] p1) (OffPtr [o2] p2)) && isSamePtr(p1, p2) => (ConstBool [o1 != o2])
(EqPtr (Const(32|64) [c]) (Const(32|64) [d])) => (ConstBool [c == d])
(NeqPtr (Const(32|64) [c]) (Const(32|64) [d])) => (ConstBool [c != d])
(EqPtr (LocalAddr _ _) (Addr _)) => (ConstBool [false])
(EqPtr (OffPtr (LocalAddr _ _)) (Addr _)) => (ConstBool [false])
(EqPtr (LocalAddr _ _) (OffPtr (Addr _))) => (ConstBool [false])
(EqPtr (OffPtr (LocalAddr _ _)) (OffPtr (Addr _))) => (ConstBool [false])
(NeqPtr (LocalAddr _ _) (Addr _)) => (ConstBool [true])
(NeqPtr (OffPtr (LocalAddr _ _)) (Addr _)) => (ConstBool [true])
(NeqPtr (LocalAddr _ _) (OffPtr (Addr _))) => (ConstBool [true])
(NeqPtr (OffPtr (LocalAddr _ _)) (OffPtr (Addr _))) => (ConstBool [true])
2018-07-03 11:34:38 -04:00
2018-04-12 15:24:08 +01:00
// Simplify address comparisons.
2020-04-23 22:31:32 -07:00
(EqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) => (Not (IsNonNil o1))
(NeqPtr (AddPtr p1 o1) p2) && isSamePtr(p1, p2) => (IsNonNil o1)
(EqPtr (Const(32|64) [0]) p) => (Not (IsNonNil p))
(NeqPtr (Const(32|64) [0]) p) => (IsNonNil p)
(EqPtr (ConstNil) p) => (Not (IsNonNil p))
(NeqPtr (ConstNil) p) => (IsNonNil p)
2018-04-12 15:24:08 +01:00
// Evaluate constant user nil checks.
2020-04-23 22:31:32 -07:00
(IsNonNil (ConstNil)) => (ConstBool [false])
(IsNonNil (Const(32|64) [c])) => (ConstBool [c != 0])
(IsNonNil (Addr _)) => (ConstBool [true])
(IsNonNil (LocalAddr _ _)) => (ConstBool [true])
2017-03-15 15:34:52 -07:00
2018-04-29 15:12:50 +01:00
// Inline small or disjoint runtime.memmove calls with constant length.
2020-02-24 16:16:27 -08:00
// See the comment in op Move in genericOps.go for discussion of the type.
2022-09-17 18:52:35 -04:00
//
// Note that we've lost any knowledge of the type and alignment requirements
// of the source and destination. We only know the size, and that the type
// contains no pointers.
// The type of the move is not necessarily v.Args[0].Type().Elem()!
// See issue 55122 for details.
//
2021-04-11 14:33:28 -04:00
// Because expand calls runs after prove, constants useful to this pattern may not appear.
// Both versions need to exist; the memory and register variants.
//
// Match post-expansion calls, memory version.
2021-02-04 16:42:35 -05:00
(SelectN [0] call:(StaticCall {sym} s1:(Store _ (Const(64|32) [sz]) s2:(Store _ src s3:(Store {t} _ dst mem)))))
2019-12-23 18:16:22 -08:00
&& sz >= 0
2020-06-12 13:48:26 -04:00
&& isSameCall(sym, "runtime.memmove")
2018-04-29 15:12:50 +01:00
&& s1.Uses == 1 && s2.Uses == 1 && s3.Uses == 1
2020-04-23 22:41:18 -07:00
&& isInlinableMemmove(dst, src, int64(sz), config)
2021-02-04 16:42:35 -05:00
&& clobber(s1, s2, s3, call)
2022-09-17 18:52:35 -04:00
=> (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem)
2017-08-09 14:00:38 -05:00
2021-04-11 14:33:28 -04:00
// Match post-expansion calls, register version.
(SelectN [0] call:(StaticCall {sym} dst src (Const(64|32) [sz]) mem))
&& sz >= 0
&& call.Uses == 1 // this will exclude all calls with results
&& isSameCall(sym, "runtime.memmove")
&& isInlinableMemmove(dst, src, int64(sz), config)
&& clobber(call)
2022-09-17 18:52:35 -04:00
=> (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem)
2021-04-11 14:33:28 -04:00
// Match pre-expansion calls.
2020-08-12 23:47:57 -04:00
(SelectN [0] call:(StaticLECall {sym} dst src (Const(64|32) [sz]) mem))
&& sz >= 0
&& call.Uses == 1 // this will exclude all calls with results
&& isSameCall(sym, "runtime.memmove")
&& isInlinableMemmove(dst, src, int64(sz), config)
&& clobber(call)
2022-09-17 18:52:35 -04:00
=> (Move {types.Types[types.TUINT8]} [int64(sz)] dst src mem)
2020-08-12 23:47:57 -04:00
2020-08-07 22:46:43 -04:00
// De-virtualize late-expanded interface calls into late-expanded static calls.
// Note that (ITab (IMake)) doesn't get rewritten until after the first opt pass,
// so this rule should trigger reliably.
// devirtLECall removes the first argument, adds the devirtualized symbol to the AuxCall, and changes the opcode
(InterLECall [argsize] {auxCall} (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) ___) && devirtLESym(v, auxCall, itab, off) !=
nil => devirtLECall(v, devirtLESym(v, auxCall, itab, off))
2018-04-11 22:47:24 +01:00
// Move and Zero optimizations.
// Move source and destination may overlap.
// Convert Moves into Zeros when the source is known to be zeros.
(Move {t} [n] dst1 src mem:(Zero {t} [n] dst2 _)) && isSamePtr(src, dst2)
2020-04-23 23:08:59 -07:00
=> (Zero {t} [n] dst1 mem)
2018-04-11 22:47:24 +01:00
(Move {t} [n] dst1 src mem:(VarDef (Zero {t} [n] dst0 _))) && isSamePtr(src, dst0)
2020-04-23 23:08:59 -07:00
=> (Zero {t} [n] dst1 mem)
2020-04-23 13:28:14 -07:00
(Move {t} [n] dst (Addr {sym} (SB)) mem) && symIsROZero(sym) => (Zero {t} [n] dst mem)
2018-04-11 22:47:24 +01:00
// Don't Store to variables that are about to be overwritten by Move/Zero.
(Zero {t1} [n] p1 store:(Store {t2} (OffPtr [o2] p2) _ mem))
&& isSamePtr(p1, p2) && store.Uses == 1
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
2018-04-11 22:47:24 +01:00
&& clobber(store)
2020-04-23 23:08:59 -07:00
=> (Zero {t1} [n] p1 mem)
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst1 src1 store:(Store {t2} op:(OffPtr [o2] dst2) _ mem))
&& isSamePtr(dst1, dst2) && store.Uses == 1
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& disjoint(src1, n, op, t2.Size())
2018-04-11 22:47:24 +01:00
&& clobber(store)
2020-04-23 23:08:59 -07:00
=> (Move {t1} [n] dst1 src1 mem)
2018-04-11 22:47:24 +01:00
// Don't Move to variables that are immediately completely overwritten.
(Zero {t} [n] dst1 move:(Move {t} [n] dst2 _ mem))
&& move.Uses == 1
&& isSamePtr(dst1, dst2)
&& clobber(move)
2020-04-23 23:08:59 -07:00
=> (Zero {t} [n] dst1 mem)
2018-04-11 22:47:24 +01:00
(Move {t} [n] dst1 src1 move:(Move {t} [n] dst2 _ mem))
&& move.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(move)
2020-04-23 23:08:59 -07:00
=> (Move {t} [n] dst1 src1 mem)
2018-04-11 22:47:24 +01:00
(Zero {t} [n] dst1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem)))
&& move.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2)
2020-02-24 17:30:44 -08:00
&& clobber(move, vardef)
2020-04-23 23:08:59 -07:00
=> (Zero {t} [n] dst1 (VarDef {x} mem))
2018-04-11 22:47:24 +01:00
(Move {t} [n] dst1 src1 vardef:(VarDef {x} move:(Move {t} [n] dst2 _ mem)))
&& move.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
2020-02-24 17:30:44 -08:00
&& clobber(move, vardef)
2020-04-23 23:08:59 -07:00
=> (Move {t} [n] dst1 src1 (VarDef {x} mem))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [0] p2) d2
m3:(Move [n] p3 _ mem)))
&& m2.Uses == 1 && m3.Uses == 1
2020-04-23 23:08:59 -07:00
&& o1 == t2.Size()
&& n == t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 mem))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [0] p3) d3
m4:(Move [n] p4 _ mem))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1
2020-04-23 23:08:59 -07:00
&& o2 == t3.Size()
&& o1-o2 == t2.Size()
&& n == t3.Size() + t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3, m4)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem)))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [o3] p3) d3
m4:(Store {t4} op4:(OffPtr [0] p4) d4
m5:(Move [n] p5 _ mem)))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1
2020-04-23 23:08:59 -07:00
&& o3 == t4.Size()
&& o2-o3 == t3.Size()
&& o1-o2 == t2.Size()
&& n == t4.Size() + t3.Size() + t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3, m4, m5)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem))))
2018-04-11 22:47:24 +01:00
// Don't Zero variables that are immediately completely overwritten
// before being accessed.
(Move {t} [n] dst1 src1 zero:(Zero {t} [n] dst2 mem))
&& zero.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
&& clobber(zero)
2020-04-23 23:08:59 -07:00
=> (Move {t} [n] dst1 src1 mem)
2018-04-11 22:47:24 +01:00
(Move {t} [n] dst1 src1 vardef:(VarDef {x} zero:(Zero {t} [n] dst2 mem)))
&& zero.Uses == 1 && vardef.Uses == 1
&& isSamePtr(dst1, dst2) && disjoint(src1, n, dst2, n)
2020-02-24 17:30:44 -08:00
&& clobber(zero, vardef)
2020-04-23 23:08:59 -07:00
=> (Move {t} [n] dst1 src1 (VarDef {x} mem))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [0] p2) d2
m3:(Zero [n] p3 mem)))
&& m2.Uses == 1 && m3.Uses == 1
2020-04-23 23:08:59 -07:00
&& o1 == t2.Size()
&& n == t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 mem))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [0] p3) d3
m4:(Zero [n] p4 mem))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1
2020-04-23 23:08:59 -07:00
&& o2 == t3.Size()
&& o1-o2 == t2.Size()
&& n == t3.Size() + t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3, m4)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 mem)))
2018-04-11 22:47:24 +01:00
(Store {t1} op1:(OffPtr [o1] p1) d1
m2:(Store {t2} op2:(OffPtr [o2] p2) d2
m3:(Store {t3} op3:(OffPtr [o3] p3) d3
m4:(Store {t4} op4:(OffPtr [0] p4) d4
m5:(Zero [n] p5 mem)))))
&& m2.Uses == 1 && m3.Uses == 1 && m4.Uses == 1 && m5.Uses == 1
2020-04-23 23:08:59 -07:00
&& o3 == t4.Size()
&& o2-o3 == t3.Size()
&& o1-o2 == t2.Size()
&& n == t4.Size() + t3.Size() + t2.Size() + t1.Size()
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-02-24 17:30:44 -08:00
&& clobber(m2, m3, m4, m5)
2020-04-23 23:08:59 -07:00
=> (Store {t1} op1 d1 (Store {t2} op2 d2 (Store {t3} op3 d3 (Store {t4} op4 d4 mem))))
2018-04-11 22:47:24 +01:00
// Don't Move from memory if the values are likely to already be
// in registers.
(Move {t1} [n] dst p1
2018-10-02 22:04:45 -04:00
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _)))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
2020-04-23 23:08:59 -07:00
&& o2 == t3.Size()
&& n == t2.Size() + t3.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [0] dst) d2 mem))
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
2018-10-02 22:04:45 -04:00
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _))))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
2020-04-23 23:08:59 -07:00
&& o3 == t4.Size()
&& o2-o3 == t3.Size()
&& n == t2.Size() + t3.Size() + t4.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [0] dst) d3 mem)))
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
2018-10-02 22:04:45 -04:00
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [o4] p4) d3
(Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _)))))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
2020-04-23 23:08:59 -07:00
&& o4 == t5.Size()
&& o3-o4 == t4.Size()
&& o2-o3 == t3.Size()
&& n == t2.Size() + t3.Size() + t4.Size() + t5.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [0] dst) d4 mem))))
2018-04-11 22:47:24 +01:00
// Same thing but with VarDef in the middle.
(Move {t1} [n] dst p1
mem:(VarDef
2018-10-02 22:04:45 -04:00
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [0] p3) d2 _))))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
2020-04-23 23:08:59 -07:00
&& o2 == t3.Size()
&& n == t2.Size() + t3.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [0] dst) d2 mem))
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(VarDef
2018-10-02 22:04:45 -04:00
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [0] p4) d3 _)))))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
2020-04-23 23:08:59 -07:00
&& o3 == t4.Size()
&& o2-o3 == t3.Size()
&& n == t2.Size() + t3.Size() + t4.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [0] dst) d3 mem)))
2018-04-11 22:47:24 +01:00
(Move {t1} [n] dst p1
mem:(VarDef
2018-10-02 22:04:45 -04:00
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Store {t3} op3:(OffPtr <tt3> [o3] p3) d2
(Store {t4} op4:(OffPtr <tt4> [o4] p4) d3
(Store {t5} op5:(OffPtr <tt5> [0] p5) d4 _))))))
2018-04-11 22:47:24 +01:00
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
2020-04-23 23:08:59 -07:00
&& o4 == t5.Size()
&& o3-o4 == t4.Size()
&& o2-o3 == t3.Size()
&& n == t2.Size() + t3.Size() + t4.Size() + t5.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-10-02 22:04:45 -04:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [0] dst) d4 mem))))
2018-04-11 22:47:24 +01:00
// Prefer to Zero and Store than to Move.
(Move {t1} [n] dst p1
mem:(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Zero {t3} [n] p3 _)))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Zero {t1} [n] dst mem))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Zero {t4} [n] p4 _))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Zero {t1} [n] dst mem)))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Zero {t5} [n] p5 _)))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
&& n >= o4 + t4.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Zero {t1} [n] dst mem))))
(Move {t1} [n] dst p1
mem:(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Store {t5} (OffPtr <tt5> [o5] p5) d4
(Zero {t6} [n] p6 _))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
&& t6.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
&& n >= o4 + t4.Size()
&& n >= o5 + t5.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [o5] dst) d4
(Zero {t1} [n] dst mem)))))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} op2:(OffPtr <tt2> [o2] p2) d1
(Zero {t3} [n] p3 _))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Zero {t1} [n] dst mem))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Zero {t4} [n] p4 _)))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Zero {t1} [n] dst mem)))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Zero {t5} [n] p5 _))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
&& n >= o4 + t4.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Zero {t1} [n] dst mem))))
(Move {t1} [n] dst p1
mem:(VarDef
(Store {t2} (OffPtr <tt2> [o2] p2) d1
(Store {t3} (OffPtr <tt3> [o3] p3) d2
(Store {t4} (OffPtr <tt4> [o4] p4) d3
(Store {t5} (OffPtr <tt5> [o5] p5) d4
(Zero {t6} [n] p6 _)))))))
&& isSamePtr(p1, p2) && isSamePtr(p2, p3) && isSamePtr(p3, p4) && isSamePtr(p4, p5) && isSamePtr(p5, p6)
2020-04-23 23:08:59 -07:00
&& t2.Alignment() <= t1.Alignment()
&& t3.Alignment() <= t1.Alignment()
&& t4.Alignment() <= t1.Alignment()
&& t5.Alignment() <= t1.Alignment()
&& t6.Alignment() <= t1.Alignment()
2018-04-11 22:47:24 +01:00
&& registerizable(b, t2)
&& registerizable(b, t3)
&& registerizable(b, t4)
&& registerizable(b, t5)
2020-04-23 23:08:59 -07:00
&& n >= o2 + t2.Size()
&& n >= o3 + t3.Size()
&& n >= o4 + t4.Size()
&& n >= o5 + t5.Size()
=> (Store {t2} (OffPtr <tt2> [o2] dst) d1
2018-04-11 22:47:24 +01:00
(Store {t3} (OffPtr <tt3> [o3] dst) d2
(Store {t4} (OffPtr <tt4> [o4] dst) d3
(Store {t5} (OffPtr <tt5> [o5] dst) d4
(Zero {t1} [n] dst mem)))))
2018-06-27 11:40:24 -05:00
2021-02-04 16:42:35 -05:00
(SelectN [0] call:(StaticLECall {sym} a x)) && needRaceCleanup(sym, call) && clobber(call) => x
(SelectN [0] call:(StaticLECall {sym} x)) && needRaceCleanup(sym, call) && clobber(call) => x
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
2022-10-27 08:28:06 -07:00
// When rewriting append to growslice, we use as the the new length the result of
// growslice so that we don't have to spill/restore the new length around the growslice call.
// The exception here is that if the new length is a constant, avoiding spilling it
// is pointless and its constantness is sometimes useful for subsequent optimizations.
// See issue 56440.
2022-10-28 14:14:24 -07:00
// Note there are 2 rules here, one for the pre-decomposed []T result and one for
// the post-decomposed (*T,int,int) result. (The latter is generated after call expansion.)
2022-10-27 08:28:06 -07:00
(SliceLen (SelectN [0] (StaticLECall {sym} _ newLen:(Const(64|32)) _ _ _ _))) && isSameCall(sym, "runtime.growslice") => newLen
2022-10-28 14:14:24 -07:00
(SelectN [1] (StaticCall {sym} _ newLen:(Const(64|32)) _ _ _ _)) && v.Type.IsInteger() && isSameCall(sym, "runtime.growslice") => newLen
2022-10-27 08:28:06 -07:00
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
// Collapse moving A -> B -> C into just A -> C.
// Later passes (deadstore, elim unread auto) will remove the A -> B move, if possible.
// This happens most commonly when B is an autotmp inserted earlier
// during compilation to ensure correctness.
2018-10-28 12:01:11 -07:00
// Take care that overlapping moves are preserved.
// Restrict this optimization to the stack, to avoid duplicating loads from the heap;
// see CL 145208 for discussion.
(Move {t1} [s] dst tmp1 midmem:(Move {t2} [s] tmp2 src _))
2020-04-23 22:41:18 -07:00
&& t1.Compare(t2) == types.CMPeq
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
&& isSamePtr(tmp1, tmp2)
2021-01-07 14:57:53 -08:00
&& isStackPtr(src) && !isVolatile(src)
2018-10-28 12:01:11 -07:00
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
2020-04-23 22:41:18 -07:00
=> (Move {t1} [s] dst src midmem)
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
2018-10-20 19:59:27 -07:00
// Same, but for large types that require VarDefs.
(Move {t1} [s] dst tmp1 midmem:(VarDef (Move {t2} [s] tmp2 src _)))
2020-04-23 22:41:18 -07:00
&& t1.Compare(t2) == types.CMPeq
2018-10-20 19:59:27 -07:00
&& isSamePtr(tmp1, tmp2)
2021-01-07 14:57:53 -08:00
&& isStackPtr(src) && !isVolatile(src)
2018-10-20 19:59:27 -07:00
&& disjoint(src, s, tmp2, s)
&& (disjoint(src, s, dst, s) || isInlinableMemmove(dst, src, s, config))
2020-04-23 22:41:18 -07:00
=> (Move {t1} [s] dst src midmem)
2018-10-20 19:59:27 -07:00
2020-04-23 15:20:56 -07:00
// Don't zero the same bits twice.
(Zero {t} [s] dst1 zero:(Zero {t} [s] dst2 _)) && isSamePtr(dst1, dst2) => zero
(Zero {t} [s] dst1 vardef:(VarDef (Zero {t} [s] dst2 _))) && isSamePtr(dst1, dst2) => vardef
cmd/compile: move argument stack construction to SSA generation
The goal of this change is to move work from walk to SSA,
and simplify things along the way.
This is hard to accomplish cleanly with small incremental changes,
so this large commit message aims to provide a roadmap to the diff.
High level description:
Prior to this change, walk was responsible for constructing (most of) the stack for function calls.
ascompatte gathered variadic arguments into a slice.
It also rewrote n.List from a list of arguments to a list of assignments to stack slots.
ascompatte was called multiple times to handle the receiver in a method call.
reorder1 then introduced temporaries into n.List as needed to avoid smashing the stack.
adjustargs then made extra stack space for go/defer args as needed.
Node to SSA construction evaluated all the statements in n.List,
and issued the function call, assuming that the stack was correctly constructed.
Intrinsic calls had to dig around inside n.List to extract the arguments,
since intrinsics don't use the stack to make function calls.
This change moves stack construction to the SSA construction phase.
ascompatte, now called walkParams, does all the work that ascompatte and reorder1 did.
It handles variadic arguments, inserts the method receiver if needed, and allocates temporaries.
It does not, however, make any assignments to stack slots.
Instead, it moves the function arguments to n.Rlist, leaving assignments to temporaries in n.List.
(It would be better to use Ninit instead of List; future work.)
During SSA construction, after doing all the temporary assignments in n.List,
the function arguments are assigned to stack slots by
constructing the appropriate SSA Value, using (*state).storeArg.
SSA construction also now handles adjustments for go/defer args.
This change also simplifies intrinsic calls, since we no longer need to undo walk's work.
Along the way, we simplify nodarg by pushing the fp==1 case to its callers, where it fits nicely.
Generated code differences:
There were a few optimizations applied along the way, the old way.
f(g()) was rewritten to do a block copy of function results to function arguments.
And reorder1 avoided introducing the final "save the stack" temporary in n.List.
The f(g()) block copy optimization never actually triggered; the order pass rewrote away g(), so that has been removed.
SSA optimizations mostly obviated the need for reorder1's optimization of avoiding the final temporary.
The exception was when the temporary's type was not SSA-able;
in that case, we got a Move into an autotmp and then an immediate Move onto the stack,
with the autotmp never read or used again.
This change introduces a new rewrite rule to detect such pointless double Moves
and collapse them into a single Move.
This is actually more powerful than the original optimization,
since the original optimization relied on the imprecise Node.HasCall calculation.
The other significant difference in the generated code is that the stack is now constructed
completely in SP-offset order. Prior to this change, the stack was constructed somewhat
haphazardly: first the final argument that Node.HasCall deemed to require a temporary,
then other arguments, then the method receiver, then the defer/go args.
SP-offset is probably a good default order. See future work.
There are a few minor object file size changes as a result of this change.
I investigated some regressions in early versions of this change.
One regression (in archive/tar) was the addition of a single CMPQ instruction,
which would be eliminated were this TODO from flagalloc to be done:
// TODO: Remove original instructions if they are never used.
One regression (in text/template) was an ADDQconstmodify that is now
a regular MOVQLoad+ADDQconst+MOVQStore, due to an unlucky change
in the order in which arguments are written. The argument change
order can also now be luckier, so this appears to be a wash.
All in all, though there will be minor winners and losers,
this change appears to be performance neutral.
Future work:
Move loading the result of function calls to SSA construction; eliminate OINDREGSP.
Consider pushing stack construction deeper into SSA world, perhaps in an arch-specific pass.
Among other benefits, this would make it easier to transition to a new calling convention.
This would require rethinking the handling of stack conflicts and is non-trivial.
Figure out some clean way to indicate that stack construction Stores/Moves
do not alias each other, so that subsequent passes may do things like
CSE+tighten shared stack setup, do DSE using non-first Stores, etc.
This would allow us to eliminate the minor text/template regression.
Possibly make assignments to stack slots not treated as statements by DWARF.
Compiler benchmarks:
name old time/op new time/op delta
Template 182ms ± 2% 179ms ± 2% -1.69% (p=0.000 n=47+48)
Unicode 86.3ms ± 5% 85.1ms ± 4% -1.36% (p=0.001 n=50+50)
GoTypes 646ms ± 1% 642ms ± 1% -0.63% (p=0.000 n=49+48)
Compiler 2.89s ± 1% 2.86s ± 2% -1.36% (p=0.000 n=48+50)
SSA 8.47s ± 1% 8.37s ± 2% -1.22% (p=0.000 n=47+50)
Flate 122ms ± 2% 121ms ± 2% -0.66% (p=0.000 n=47+45)
GoParser 147ms ± 2% 146ms ± 2% -0.53% (p=0.006 n=46+49)
Reflect 406ms ± 2% 403ms ± 2% -0.76% (p=0.000 n=48+43)
Tar 162ms ± 3% 162ms ± 4% ~ (p=0.191 n=46+50)
XML 223ms ± 2% 222ms ± 2% -0.37% (p=0.031 n=45+49)
[Geo mean] 382ms 378ms -0.89%
name old user-time/op new user-time/op delta
Template 219ms ± 3% 216ms ± 3% -1.56% (p=0.000 n=50+48)
Unicode 109ms ± 6% 109ms ± 5% ~ (p=0.190 n=50+49)
GoTypes 836ms ± 2% 828ms ± 2% -0.96% (p=0.000 n=49+48)
Compiler 3.87s ± 2% 3.80s ± 1% -1.81% (p=0.000 n=49+46)
SSA 12.0s ± 1% 11.8s ± 1% -2.01% (p=0.000 n=48+50)
Flate 142ms ± 3% 141ms ± 3% -0.85% (p=0.003 n=50+48)
GoParser 178ms ± 4% 175ms ± 4% -1.66% (p=0.000 n=48+46)
Reflect 520ms ± 2% 512ms ± 2% -1.44% (p=0.000 n=45+48)
Tar 200ms ± 3% 198ms ± 4% -0.61% (p=0.037 n=47+50)
XML 277ms ± 3% 275ms ± 3% -0.85% (p=0.000 n=49+48)
[Geo mean] 482ms 476ms -1.23%
name old alloc/op new alloc/op delta
Template 36.1MB ± 0% 35.3MB ± 0% -2.18% (p=0.008 n=5+5)
Unicode 29.8MB ± 0% 29.3MB ± 0% -1.58% (p=0.008 n=5+5)
GoTypes 125MB ± 0% 123MB ± 0% -2.13% (p=0.008 n=5+5)
Compiler 531MB ± 0% 513MB ± 0% -3.40% (p=0.008 n=5+5)
SSA 2.00GB ± 0% 1.93GB ± 0% -3.34% (p=0.008 n=5+5)
Flate 24.5MB ± 0% 24.3MB ± 0% -1.18% (p=0.008 n=5+5)
GoParser 29.4MB ± 0% 28.7MB ± 0% -2.34% (p=0.008 n=5+5)
Reflect 87.1MB ± 0% 86.0MB ± 0% -1.33% (p=0.008 n=5+5)
Tar 35.3MB ± 0% 34.8MB ± 0% -1.44% (p=0.008 n=5+5)
XML 47.9MB ± 0% 47.1MB ± 0% -1.86% (p=0.008 n=5+5)
[Geo mean] 82.8MB 81.1MB -2.08%
name old allocs/op new allocs/op delta
Template 352k ± 0% 347k ± 0% -1.32% (p=0.008 n=5+5)
Unicode 342k ± 0% 339k ± 0% -0.66% (p=0.008 n=5+5)
GoTypes 1.29M ± 0% 1.27M ± 0% -1.30% (p=0.008 n=5+5)
Compiler 4.98M ± 0% 4.87M ± 0% -2.14% (p=0.008 n=5+5)
SSA 15.7M ± 0% 15.2M ± 0% -2.86% (p=0.008 n=5+5)
Flate 233k ± 0% 231k ± 0% -0.83% (p=0.008 n=5+5)
GoParser 296k ± 0% 291k ± 0% -1.54% (p=0.016 n=5+4)
Reflect 1.05M ± 0% 1.04M ± 0% -0.65% (p=0.008 n=5+5)
Tar 343k ± 0% 339k ± 0% -0.97% (p=0.008 n=5+5)
XML 432k ± 0% 426k ± 0% -1.19% (p=0.008 n=5+5)
[Geo mean] 815k 804k -1.35%
name old object-bytes new object-bytes delta
Template 505kB ± 0% 505kB ± 0% -0.01% (p=0.008 n=5+5)
Unicode 224kB ± 0% 224kB ± 0% ~ (all equal)
GoTypes 1.82MB ± 0% 1.83MB ± 0% +0.06% (p=0.008 n=5+5)
Flate 324kB ± 0% 324kB ± 0% +0.00% (p=0.008 n=5+5)
GoParser 402kB ± 0% 402kB ± 0% +0.04% (p=0.008 n=5+5)
Reflect 1.39MB ± 0% 1.39MB ± 0% -0.01% (p=0.008 n=5+5)
Tar 449kB ± 0% 449kB ± 0% -0.02% (p=0.008 n=5+5)
XML 598kB ± 0% 597kB ± 0% -0.05% (p=0.008 n=5+5)
Change-Id: Ifc9d5c1bd01f90171414b8fb18ffe2290d271143
Reviewed-on: https://go-review.googlesource.com/c/114797
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Matthew Dempsky <mdempsky@google.com>
2018-05-06 12:58:53 -07:00
// Elide self-moves. This only happens rarely (e.g test/fixedbugs/bug277.go).
// However, this rule is needed to prevent the previous rule from looping forever in such cases.
2020-04-23 23:08:59 -07:00
(Move dst src mem) && isSamePtr(dst, src) => mem
2022-08-03 22:58:30 -07:00
// Constant rotate detection.
((Add64|Or64|Xor64) (Lsh64x64 x z:(Const64 <t> [c])) (Rsh64Ux64 x (Const64 [d]))) && c < 64 && d == 64-c && canRotate(config, 64) => (RotateLeft64 x z)
((Add32|Or32|Xor32) (Lsh32x64 x z:(Const64 <t> [c])) (Rsh32Ux64 x (Const64 [d]))) && c < 32 && d == 32-c && canRotate(config, 32) => (RotateLeft32 x z)
((Add16|Or16|Xor16) (Lsh16x64 x z:(Const64 <t> [c])) (Rsh16Ux64 x (Const64 [d]))) && c < 16 && d == 16-c && canRotate(config, 16) => (RotateLeft16 x z)
((Add8|Or8|Xor8) (Lsh8x64 x z:(Const64 <t> [c])) (Rsh8Ux64 x (Const64 [d]))) && c < 8 && d == 8-c && canRotate(config, 8) => (RotateLeft8 x z)
// Non-constant rotate detection.
// We use shiftIsBounded to make sure that neither of the shifts are >64.
// Note: these rules are subtle when the shift amounts are 0/64, as Go shifts
// are different from most native shifts. But it works out.
((Add64|Or64|Xor64) left:(Lsh64x64 x y) right:(Rsh64Ux64 x (Sub64 (Const64 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y)
((Add64|Or64|Xor64) left:(Lsh64x32 x y) right:(Rsh64Ux32 x (Sub32 (Const32 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y)
((Add64|Or64|Xor64) left:(Lsh64x16 x y) right:(Rsh64Ux16 x (Sub16 (Const16 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y)
((Add64|Or64|Xor64) left:(Lsh64x8 x y) right:(Rsh64Ux8 x (Sub8 (Const8 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x y)
((Add64|Or64|Xor64) right:(Rsh64Ux64 x y) left:(Lsh64x64 x z:(Sub64 (Const64 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z)
((Add64|Or64|Xor64) right:(Rsh64Ux32 x y) left:(Lsh64x32 x z:(Sub32 (Const32 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z)
((Add64|Or64|Xor64) right:(Rsh64Ux16 x y) left:(Lsh64x16 x z:(Sub16 (Const16 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z)
((Add64|Or64|Xor64) right:(Rsh64Ux8 x y) left:(Lsh64x8 x z:(Sub8 (Const8 [64]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 64) => (RotateLeft64 x z)
((Add32|Or32|Xor32) left:(Lsh32x64 x y) right:(Rsh32Ux64 x (Sub64 (Const64 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y)
((Add32|Or32|Xor32) left:(Lsh32x32 x y) right:(Rsh32Ux32 x (Sub32 (Const32 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y)
((Add32|Or32|Xor32) left:(Lsh32x16 x y) right:(Rsh32Ux16 x (Sub16 (Const16 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y)
((Add32|Or32|Xor32) left:(Lsh32x8 x y) right:(Rsh32Ux8 x (Sub8 (Const8 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x y)
((Add32|Or32|Xor32) right:(Rsh32Ux64 x y) left:(Lsh32x64 x z:(Sub64 (Const64 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z)
((Add32|Or32|Xor32) right:(Rsh32Ux32 x y) left:(Lsh32x32 x z:(Sub32 (Const32 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z)
((Add32|Or32|Xor32) right:(Rsh32Ux16 x y) left:(Lsh32x16 x z:(Sub16 (Const16 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z)
((Add32|Or32|Xor32) right:(Rsh32Ux8 x y) left:(Lsh32x8 x z:(Sub8 (Const8 [32]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 32) => (RotateLeft32 x z)
((Add16|Or16|Xor16) left:(Lsh16x64 x y) right:(Rsh16Ux64 x (Sub64 (Const64 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y)
((Add16|Or16|Xor16) left:(Lsh16x32 x y) right:(Rsh16Ux32 x (Sub32 (Const32 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y)
((Add16|Or16|Xor16) left:(Lsh16x16 x y) right:(Rsh16Ux16 x (Sub16 (Const16 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y)
((Add16|Or16|Xor16) left:(Lsh16x8 x y) right:(Rsh16Ux8 x (Sub8 (Const8 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x y)
((Add16|Or16|Xor16) right:(Rsh16Ux64 x y) left:(Lsh16x64 x z:(Sub64 (Const64 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z)
((Add16|Or16|Xor16) right:(Rsh16Ux32 x y) left:(Lsh16x32 x z:(Sub32 (Const32 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z)
((Add16|Or16|Xor16) right:(Rsh16Ux16 x y) left:(Lsh16x16 x z:(Sub16 (Const16 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z)
((Add16|Or16|Xor16) right:(Rsh16Ux8 x y) left:(Lsh16x8 x z:(Sub8 (Const8 [16]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 16) => (RotateLeft16 x z)
((Add8|Or8|Xor8) left:(Lsh8x64 x y) right:(Rsh8Ux64 x (Sub64 (Const64 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y)
((Add8|Or8|Xor8) left:(Lsh8x32 x y) right:(Rsh8Ux32 x (Sub32 (Const32 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y)
((Add8|Or8|Xor8) left:(Lsh8x16 x y) right:(Rsh8Ux16 x (Sub16 (Const16 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y)
((Add8|Or8|Xor8) left:(Lsh8x8 x y) right:(Rsh8Ux8 x (Sub8 (Const8 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x y)
((Add8|Or8|Xor8) right:(Rsh8Ux64 x y) left:(Lsh8x64 x z:(Sub64 (Const64 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z)
((Add8|Or8|Xor8) right:(Rsh8Ux32 x y) left:(Lsh8x32 x z:(Sub32 (Const32 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z)
((Add8|Or8|Xor8) right:(Rsh8Ux16 x y) left:(Lsh8x16 x z:(Sub16 (Const16 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z)
((Add8|Or8|Xor8) right:(Rsh8Ux8 x y) left:(Lsh8x8 x z:(Sub8 (Const8 [8]) y))) && (shiftIsBounded(left) || shiftIsBounded(right)) && canRotate(config, 8) => (RotateLeft8 x z)
// Rotating by y&c, with c a mask that doesn't change the bottom bits, is the same as rotating by y.
(RotateLeft64 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&63 == 63 => (RotateLeft64 x y)
(RotateLeft32 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&31 == 31 => (RotateLeft32 x y)
(RotateLeft16 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&15 == 15 => (RotateLeft16 x y)
(RotateLeft8 x (And(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&7 == 7 => (RotateLeft8 x y)
// Rotating by -(y&c), with c a mask that doesn't change the bottom bits, is the same as rotating by -y.
(RotateLeft64 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&63 == 63 => (RotateLeft64 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft32 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&31 == 31 => (RotateLeft32 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft16 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&15 == 15 => (RotateLeft16 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft8 x (Neg(64|32|16|8) (And(64|32|16|8) y (Const(64|32|16|8) [c])))) && c&7 == 7 => (RotateLeft8 x (Neg(64|32|16|8) <y.Type> y))
// Rotating by y+c, with c a multiple of the value width, is the same as rotating by y.
(RotateLeft64 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&63 == 0 => (RotateLeft64 x y)
(RotateLeft32 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&31 == 0 => (RotateLeft32 x y)
(RotateLeft16 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&15 == 0 => (RotateLeft16 x y)
(RotateLeft8 x (Add(64|32|16|8) y (Const(64|32|16|8) [c]))) && c&7 == 0 => (RotateLeft8 x y)
// Rotating by c-y, with c a multiple of the value width, is the same as rotating by -y.
(RotateLeft64 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&63 == 0 => (RotateLeft64 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft32 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&31 == 0 => (RotateLeft32 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft16 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&15 == 0 => (RotateLeft16 x (Neg(64|32|16|8) <y.Type> y))
(RotateLeft8 x (Sub(64|32|16|8) (Const(64|32|16|8) [c]) y)) && c&7 == 0 => (RotateLeft8 x (Neg(64|32|16|8) <y.Type> y))
// Ensure we don't do Const64 rotates in a 32-bit system.
(RotateLeft64 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft64 x (Const32 <t> [int32(c)]))
(RotateLeft32 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft32 x (Const32 <t> [int32(c)]))
(RotateLeft16 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft16 x (Const32 <t> [int32(c)]))
(RotateLeft8 x (Const64 <t> [c])) && config.PtrSize == 4 => (RotateLeft8 x (Const32 <t> [int32(c)]))
2022-08-18 14:13:33 -07:00
// Rotating by c, then by d, is the same as rotating by c+d.
// We're trading a rotate for an add, which seems generally a good choice. It is especially good when c and d are constants.
// This rule is a bit tricky as c and d might be different widths. We handle only cases where they are the same width.
(RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 8 && d.Type.Size() == 8 => (RotateLeft(64|32|16|8) x (Add64 <c.Type> c d))
(RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 4 && d.Type.Size() == 4 => (RotateLeft(64|32|16|8) x (Add32 <c.Type> c d))
(RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 2 && d.Type.Size() == 2 => (RotateLeft(64|32|16|8) x (Add16 <c.Type> c d))
(RotateLeft(64|32|16|8) (RotateLeft(64|32|16|8) x c) d) && c.Type.Size() == 1 && d.Type.Size() == 1 => (RotateLeft(64|32|16|8) x (Add8 <c.Type> c d))