2015-03-23 17:02:11 -07:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package ssa
|
|
|
|
|
|
2015-09-04 06:33:56 -05:00
|
|
|
import (
|
cmd/compile: de-virtualize interface calls
With this change, code like
h := sha1.New()
h.Write(buf)
sum := h.Sum()
gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.
The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.
The most common pattern identified by the compiler
is a constructor like
func New() Interface { return &impl{...} }
where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.
Some existing benchmarks that change on darwin/amd64:
Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10)
Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10)
Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9)
On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.
Updates #19361
Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d
Reviewed-on: https://go-review.googlesource.com/38139
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2017-03-13 15:03:17 -07:00
|
|
|
"cmd/internal/obj"
|
2015-09-04 06:33:56 -05:00
|
|
|
"fmt"
|
|
|
|
|
"math"
|
2016-05-24 15:43:25 -07:00
|
|
|
"os"
|
|
|
|
|
"path/filepath"
|
2015-09-04 06:33:56 -05:00
|
|
|
)
|
2015-03-23 17:02:11 -07:00
|
|
|
|
2017-03-17 10:50:20 -07:00
|
|
|
func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter) {
|
2015-03-23 17:02:11 -07:00
|
|
|
// repeat rewrites until we find no more rewrites
|
|
|
|
|
for {
|
|
|
|
|
change := false
|
|
|
|
|
for _, b := range f.Blocks {
|
2015-05-28 16:45:33 -07:00
|
|
|
if b.Control != nil && b.Control.Op == OpCopy {
|
|
|
|
|
for b.Control.Op == OpCopy {
|
2016-03-15 20:45:50 -07:00
|
|
|
b.SetControl(b.Control.Args[0])
|
2015-05-28 16:45:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
2017-03-17 10:50:20 -07:00
|
|
|
if rb(b) {
|
2015-05-28 16:45:33 -07:00
|
|
|
change = true
|
|
|
|
|
}
|
2015-03-23 17:02:11 -07:00
|
|
|
for _, v := range b.Values {
|
2016-04-21 10:11:33 +02:00
|
|
|
change = phielimValue(v) || change
|
|
|
|
|
|
2016-04-11 21:23:11 -07:00
|
|
|
// Eliminate copy inputs.
|
|
|
|
|
// If any copy input becomes unused, mark it
|
|
|
|
|
// as invalid and discard its argument. Repeat
|
|
|
|
|
// recursively on the discarded argument.
|
|
|
|
|
// This phase helps remove phantom "dead copy" uses
|
|
|
|
|
// of a value so that a x.Uses==1 rule condition
|
|
|
|
|
// fires reliably.
|
|
|
|
|
for i, a := range v.Args {
|
|
|
|
|
if a.Op != OpCopy {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2016-04-27 16:58:50 -07:00
|
|
|
v.SetArg(i, copySource(a))
|
2016-04-11 21:23:11 -07:00
|
|
|
change = true
|
|
|
|
|
for a.Uses == 0 {
|
|
|
|
|
b := a.Args[0]
|
|
|
|
|
a.reset(OpInvalid)
|
|
|
|
|
a = b
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-18 16:44:20 -07:00
|
|
|
// apply rewrite function
|
2017-03-17 10:50:20 -07:00
|
|
|
if rv(v) {
|
2015-03-23 17:02:11 -07:00
|
|
|
change = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if !change {
|
2016-04-11 21:23:11 -07:00
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-04-20 15:02:48 -07:00
|
|
|
// remove clobbered values
|
2016-04-11 21:23:11 -07:00
|
|
|
for _, b := range f.Blocks {
|
|
|
|
|
j := 0
|
|
|
|
|
for i, v := range b.Values {
|
|
|
|
|
if v.Op == OpInvalid {
|
|
|
|
|
f.freeValue(v)
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if i != j {
|
|
|
|
|
b.Values[j] = v
|
|
|
|
|
}
|
|
|
|
|
j++
|
|
|
|
|
}
|
|
|
|
|
if j != len(b.Values) {
|
|
|
|
|
tail := b.Values[j:]
|
|
|
|
|
for j := range tail {
|
|
|
|
|
tail[j] = nil
|
|
|
|
|
}
|
|
|
|
|
b.Values = b.Values[:j]
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Common functions called from rewriting rules
|
|
|
|
|
|
2015-08-12 16:38:11 -04:00
|
|
|
func is64BitFloat(t Type) bool {
|
|
|
|
|
return t.Size() == 8 && t.IsFloat()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func is32BitFloat(t Type) bool {
|
|
|
|
|
return t.Size() == 4 && t.IsFloat()
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-23 17:02:11 -07:00
|
|
|
func is64BitInt(t Type) bool {
|
2015-04-15 15:51:25 -07:00
|
|
|
return t.Size() == 8 && t.IsInteger()
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func is32BitInt(t Type) bool {
|
2015-04-15 15:51:25 -07:00
|
|
|
return t.Size() == 4 && t.IsInteger()
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-14 11:38:46 -07:00
|
|
|
func is16BitInt(t Type) bool {
|
|
|
|
|
return t.Size() == 2 && t.IsInteger()
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func is8BitInt(t Type) bool {
|
|
|
|
|
return t.Size() == 1 && t.IsInteger()
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-15 15:51:25 -07:00
|
|
|
func isPtr(t Type) bool {
|
2016-03-28 10:55:44 -07:00
|
|
|
return t.IsPtrShaped()
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func isSigned(t Type) bool {
|
2015-04-15 15:51:25 -07:00
|
|
|
return t.IsSigned()
|
2015-03-26 10:49:03 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func typeSize(t Type) int64 {
|
2015-04-15 15:51:25 -07:00
|
|
|
return t.Size()
|
2015-03-23 17:02:11 -07:00
|
|
|
}
|
2015-05-18 16:44:20 -07:00
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// mergeSym merges two symbolic offsets. There is no real merging of
|
2015-08-23 21:14:25 -07:00
|
|
|
// offsets, we just pick the non-nil one.
|
2015-06-19 21:02:28 -07:00
|
|
|
func mergeSym(x, y interface{}) interface{} {
|
|
|
|
|
if x == nil {
|
|
|
|
|
return y
|
|
|
|
|
}
|
|
|
|
|
if y == nil {
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
panic(fmt.Sprintf("mergeSym with two non-nil syms %s %s", x, y))
|
|
|
|
|
}
|
2015-08-23 21:14:25 -07:00
|
|
|
func canMergeSym(x, y interface{}) bool {
|
|
|
|
|
return x == nil || y == nil
|
|
|
|
|
}
|
2015-06-19 21:02:28 -07:00
|
|
|
|
2016-09-14 10:42:14 -04:00
|
|
|
// canMergeLoad reports whether the load can be merged into target without
|
|
|
|
|
// invalidating the schedule.
|
2017-03-18 11:16:30 -07:00
|
|
|
// It also checks that the other non-load argument x is something we
|
|
|
|
|
// are ok with clobbering (all our current load+op instructions clobber
|
|
|
|
|
// their input register).
|
|
|
|
|
func canMergeLoad(target, load, x *Value) bool {
|
2016-09-14 10:42:14 -04:00
|
|
|
if target.Block.ID != load.Block.ID {
|
|
|
|
|
// If the load is in a different block do not merge it.
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-03-18 11:16:30 -07:00
|
|
|
|
|
|
|
|
// We can't merge the load into the target if the load
|
|
|
|
|
// has more than one use.
|
|
|
|
|
if load.Uses != 1 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// The register containing x is going to get clobbered.
|
|
|
|
|
// Don't merge if we still need the value of x.
|
|
|
|
|
// We don't have liveness information here, but we can
|
|
|
|
|
// approximate x dying with:
|
|
|
|
|
// 1) target is x's only use.
|
|
|
|
|
// 2) target is not in a deeper loop than x.
|
|
|
|
|
if x.Uses != 1 {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
loopnest := x.Block.Func.loopnest()
|
|
|
|
|
loopnest.calculateDepths()
|
|
|
|
|
if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-03 13:44:18 -08:00
|
|
|
mem := load.MemoryArg()
|
2016-09-14 10:42:14 -04:00
|
|
|
|
|
|
|
|
// We need the load's memory arg to still be alive at target. That
|
|
|
|
|
// can't be the case if one of target's args depends on a memory
|
|
|
|
|
// state that is a successor of load's memory arg.
|
|
|
|
|
//
|
|
|
|
|
// For example, it would be invalid to merge load into target in
|
|
|
|
|
// the following situation because newmem has killed oldmem
|
|
|
|
|
// before target is reached:
|
|
|
|
|
// load = read ... oldmem
|
|
|
|
|
// newmem = write ... oldmem
|
|
|
|
|
// arg0 = read ... newmem
|
|
|
|
|
// target = add arg0 load
|
|
|
|
|
//
|
|
|
|
|
// If the argument comes from a different block then we can exclude
|
|
|
|
|
// it immediately because it must dominate load (which is in the
|
|
|
|
|
// same block as target).
|
|
|
|
|
var args []*Value
|
|
|
|
|
for _, a := range target.Args {
|
|
|
|
|
if a != load && a.Block.ID == target.Block.ID {
|
|
|
|
|
args = append(args, a)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// memPreds contains memory states known to be predecessors of load's
|
|
|
|
|
// memory state. It is lazily initialized.
|
|
|
|
|
var memPreds map[*Value]bool
|
|
|
|
|
search:
|
|
|
|
|
for i := 0; len(args) > 0; i++ {
|
|
|
|
|
const limit = 100
|
|
|
|
|
if i >= limit {
|
|
|
|
|
// Give up if we have done a lot of iterations.
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
v := args[len(args)-1]
|
|
|
|
|
args = args[:len(args)-1]
|
|
|
|
|
if target.Block.ID != v.Block.ID {
|
|
|
|
|
// Since target and load are in the same block
|
|
|
|
|
// we can stop searching when we leave the block.
|
|
|
|
|
continue search
|
|
|
|
|
}
|
|
|
|
|
if v.Op == OpPhi {
|
|
|
|
|
// A Phi implies we have reached the top of the block.
|
|
|
|
|
continue search
|
|
|
|
|
}
|
|
|
|
|
if v.Type.IsTuple() && v.Type.FieldType(1).IsMemory() {
|
|
|
|
|
// We could handle this situation however it is likely
|
|
|
|
|
// to be very rare.
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
if memPreds == nil {
|
|
|
|
|
// Initialise a map containing memory states
|
|
|
|
|
// known to be predecessors of load's memory
|
|
|
|
|
// state.
|
|
|
|
|
memPreds = make(map[*Value]bool)
|
|
|
|
|
m := mem
|
|
|
|
|
const limit = 50
|
|
|
|
|
for i := 0; i < limit; i++ {
|
|
|
|
|
if m.Op == OpPhi {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if m.Block.ID != target.Block.ID {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if !m.Type.IsMemory() {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
memPreds[m] = true
|
|
|
|
|
if len(m.Args) == 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2017-03-03 13:44:18 -08:00
|
|
|
m = m.MemoryArg()
|
2016-09-14 10:42:14 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// We can merge if v is a predecessor of mem.
|
|
|
|
|
//
|
|
|
|
|
// For example, we can merge load into target in the
|
|
|
|
|
// following scenario:
|
|
|
|
|
// x = read ... v
|
|
|
|
|
// mem = write ... v
|
|
|
|
|
// load = read ... mem
|
|
|
|
|
// target = add x load
|
|
|
|
|
if memPreds[v] {
|
|
|
|
|
continue search
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
if len(v.Args) > 0 && v.Args[len(v.Args)-1] == mem {
|
|
|
|
|
// If v takes mem as an input then we know mem
|
|
|
|
|
// is valid at this point.
|
|
|
|
|
continue search
|
|
|
|
|
}
|
|
|
|
|
for _, a := range v.Args {
|
|
|
|
|
if target.Block.ID == a.Block.ID {
|
|
|
|
|
args = append(args, a)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
2017-03-18 11:16:30 -07:00
|
|
|
|
2016-09-14 10:42:14 -04:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-16 14:17:33 -04:00
|
|
|
// isArg returns whether s is an arg symbol
|
|
|
|
|
func isArg(s interface{}) bool {
|
|
|
|
|
_, ok := s.(*ArgSymbol)
|
|
|
|
|
return ok
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// isAuto returns whether s is an auto symbol
|
|
|
|
|
func isAuto(s interface{}) bool {
|
|
|
|
|
_, ok := s.(*AutoSymbol)
|
|
|
|
|
return ok
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-12 11:24:03 -07:00
|
|
|
func fitsARM64Offset(off, align int64, sym interface{}) bool {
|
|
|
|
|
// only small offset (between -256 and 256) or offset that is a multiple of data size
|
|
|
|
|
// can be encoded in the instructions
|
|
|
|
|
// since this rewriting takes place before stack allocation, the offset to SP is unknown,
|
|
|
|
|
// so don't do it for args and locals with unaligned offset
|
|
|
|
|
if !is32Bit(off) {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
if align == 1 {
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return !isArg(sym) && (off%align == 0 || off < 256 && off > -256 && !isAuto(sym))
|
|
|
|
|
}
|
|
|
|
|
|
2016-08-26 15:41:51 -04:00
|
|
|
// isSameSym returns whether sym is the same as the given named symbol
|
|
|
|
|
func isSameSym(sym interface{}, name string) bool {
|
|
|
|
|
s, ok := sym.(fmt.Stringer)
|
|
|
|
|
return ok && s.String() == name
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-11 20:43:15 -06:00
|
|
|
// nlz returns the number of leading zeros.
|
|
|
|
|
func nlz(x int64) int64 {
|
|
|
|
|
// log2(0) == 1, so nlz(0) == 64
|
|
|
|
|
return 63 - log2(x)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ntz returns the number of trailing zeros.
|
|
|
|
|
func ntz(x int64) int64 {
|
|
|
|
|
return 64 - nlz(^x&(x-1))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// nlo returns the number of leading ones.
|
|
|
|
|
func nlo(x int64) int64 {
|
|
|
|
|
return nlz(^x)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// nto returns the number of trailing ones.
|
|
|
|
|
func nto(x int64) int64 {
|
|
|
|
|
return ntz(^x)
|
|
|
|
|
}
|
|
|
|
|
|
2017-02-13 16:00:09 -08:00
|
|
|
// log2 returns logarithm in base 2 of uint64(n), with log2(0) = -1.
|
2016-09-07 14:04:31 -07:00
|
|
|
// Rounds down.
|
2015-07-17 12:26:35 +02:00
|
|
|
func log2(n int64) (l int64) {
|
2016-02-11 20:43:15 -06:00
|
|
|
l = -1
|
|
|
|
|
x := uint64(n)
|
|
|
|
|
for ; x >= 0x8000; x >>= 16 {
|
|
|
|
|
l += 16
|
|
|
|
|
}
|
|
|
|
|
if x >= 0x80 {
|
|
|
|
|
x >>= 8
|
|
|
|
|
l += 8
|
|
|
|
|
}
|
|
|
|
|
if x >= 0x8 {
|
|
|
|
|
x >>= 4
|
|
|
|
|
l += 4
|
|
|
|
|
}
|
|
|
|
|
if x >= 0x2 {
|
|
|
|
|
x >>= 2
|
|
|
|
|
l += 2
|
|
|
|
|
}
|
|
|
|
|
if x >= 0x1 {
|
2015-07-17 12:26:35 +02:00
|
|
|
l++
|
|
|
|
|
}
|
2016-02-11 20:43:15 -06:00
|
|
|
return
|
2015-07-17 12:26:35 +02:00
|
|
|
}
|
|
|
|
|
|
2015-07-25 12:53:58 -05:00
|
|
|
// isPowerOfTwo reports whether n is a power of 2.
|
2015-07-17 12:26:35 +02:00
|
|
|
func isPowerOfTwo(n int64) bool {
|
|
|
|
|
return n > 0 && n&(n-1) == 0
|
|
|
|
|
}
|
2015-07-25 12:53:58 -05:00
|
|
|
|
|
|
|
|
// is32Bit reports whether n can be represented as a signed 32 bit integer.
|
|
|
|
|
func is32Bit(n int64) bool {
|
|
|
|
|
return n == int64(int32(n))
|
|
|
|
|
}
|
2015-09-03 18:24:22 -05:00
|
|
|
|
2016-07-06 13:32:52 -07:00
|
|
|
// is16Bit reports whether n can be represented as a signed 16 bit integer.
|
|
|
|
|
func is16Bit(n int64) bool {
|
|
|
|
|
return n == int64(int16(n))
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-05 13:21:09 -07:00
|
|
|
// isU16Bit reports whether n can be represented as an unsigned 16 bit integer.
|
2016-09-26 10:06:10 -07:00
|
|
|
func isU16Bit(n int64) bool {
|
|
|
|
|
return n == int64(uint16(n))
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-05 13:21:09 -07:00
|
|
|
// isU32Bit reports whether n can be represented as an unsigned 32 bit integer.
|
|
|
|
|
func isU32Bit(n int64) bool {
|
|
|
|
|
return n == int64(uint32(n))
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-12 14:50:10 -04:00
|
|
|
// is20Bit reports whether n can be represented as a signed 20 bit integer.
|
|
|
|
|
func is20Bit(n int64) bool {
|
|
|
|
|
return -(1<<19) <= n && n < (1<<19)
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-03 18:24:22 -05:00
|
|
|
// b2i translates a boolean value to 0 or 1 for assigning to auxInt.
|
|
|
|
|
func b2i(b bool) int64 {
|
|
|
|
|
if b {
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
return 0
|
|
|
|
|
}
|
2015-09-04 06:33:56 -05:00
|
|
|
|
2016-03-11 19:36:54 -06:00
|
|
|
// i2f is used in rules for converting from an AuxInt to a float.
|
|
|
|
|
func i2f(i int64) float64 {
|
|
|
|
|
return math.Float64frombits(uint64(i))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// i2f32 is used in rules for converting from an AuxInt to a float32.
|
|
|
|
|
func i2f32(i int64) float32 {
|
|
|
|
|
return float32(math.Float64frombits(uint64(i)))
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-04 06:33:56 -05:00
|
|
|
// f2i is used in the rules for storing a float in AuxInt.
|
|
|
|
|
func f2i(f float64) int64 {
|
|
|
|
|
return int64(math.Float64bits(f))
|
|
|
|
|
}
|
2015-09-18 18:23:34 -07:00
|
|
|
|
2016-02-03 06:21:24 -05:00
|
|
|
// uaddOvf returns true if unsigned a+b would overflow.
|
|
|
|
|
func uaddOvf(a, b int64) bool {
|
|
|
|
|
return uint64(a)+uint64(b) < uint64(a)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/compile: de-virtualize interface calls
With this change, code like
h := sha1.New()
h.Write(buf)
sum := h.Sum()
gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.
The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.
The most common pattern identified by the compiler
is a constructor like
func New() Interface { return &impl{...} }
where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.
Some existing benchmarks that change on darwin/amd64:
Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10)
Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10)
Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9)
On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.
Updates #19361
Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d
Reviewed-on: https://go-review.googlesource.com/38139
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2017-03-13 15:03:17 -07:00
|
|
|
// de-virtualize an InterCall
|
|
|
|
|
// 'sym' is the symbol for the itab
|
|
|
|
|
func devirt(v *Value, sym interface{}, offset int64) *obj.LSym {
|
|
|
|
|
f := v.Block.Func
|
|
|
|
|
ext, ok := sym.(*ExternSymbol)
|
|
|
|
|
if !ok {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2017-03-16 22:42:10 -07:00
|
|
|
lsym := f.fe.DerefItab(ext.Sym, offset)
|
cmd/compile: de-virtualize interface calls
With this change, code like
h := sha1.New()
h.Write(buf)
sum := h.Sum()
gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.
The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.
The most common pattern identified by the compiler
is a constructor like
func New() Interface { return &impl{...} }
where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.
Some existing benchmarks that change on darwin/amd64:
Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10)
Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10)
Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9)
On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.
Updates #19361
Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d
Reviewed-on: https://go-review.googlesource.com/38139
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2017-03-13 15:03:17 -07:00
|
|
|
if f.pass.debug > 0 {
|
|
|
|
|
if lsym != nil {
|
2017-03-16 22:42:10 -07:00
|
|
|
f.Warnl(v.Pos, "de-virtualizing call")
|
cmd/compile: de-virtualize interface calls
With this change, code like
h := sha1.New()
h.Write(buf)
sum := h.Sum()
gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.
The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.
The most common pattern identified by the compiler
is a constructor like
func New() Interface { return &impl{...} }
where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.
Some existing benchmarks that change on darwin/amd64:
Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10)
Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10)
Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9)
On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.
Updates #19361
Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d
Reviewed-on: https://go-review.googlesource.com/38139
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2017-03-13 15:03:17 -07:00
|
|
|
} else {
|
2017-03-16 22:42:10 -07:00
|
|
|
f.Warnl(v.Pos, "couldn't de-virtualize call")
|
cmd/compile: de-virtualize interface calls
With this change, code like
h := sha1.New()
h.Write(buf)
sum := h.Sum()
gets compiled into static calls rather than
interface calls, because the compiler is able
to prove that 'h' is really a *sha1.digest.
The InterCall re-write rule hits a few dozen times
during make.bash, and hundreds of times during all.bash.
The most common pattern identified by the compiler
is a constructor like
func New() Interface { return &impl{...} }
where the constructor gets inlined into the caller,
and the result is used immediately. Examples include
{sha1,md5,crc32,crc64,...}.New, base64.NewEncoder,
base64.NewDecoder, errors.New, net.Pipe, and so on.
Some existing benchmarks that change on darwin/amd64:
Crc64/ISO4KB-8 2.67µs ± 1% 2.66µs ± 0% -0.36% (p=0.015 n=10+10)
Crc64/ISO1KB-8 694ns ± 0% 690ns ± 1% -0.59% (p=0.001 n=10+10)
Adler32KB-8 473ns ± 1% 471ns ± 0% -0.39% (p=0.010 n=10+9)
On architectures like amd64, the reduction in code size
appears to contribute more to benchmark improvements than just
removing the indirect call, since that branch gets predicted
accurately when called in a loop.
Updates #19361
Change-Id: I57d4dc21ef40a05ec0fbd55a9bb0eb74cdc67a3d
Reviewed-on: https://go-review.googlesource.com/38139
Run-TryBot: Philip Hofer <phofer@umich.edu>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2017-03-13 15:03:17 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return lsym
|
|
|
|
|
}
|
|
|
|
|
|
2016-02-13 17:37:19 -06:00
|
|
|
// isSamePtr reports whether p1 and p2 point to the same address.
|
|
|
|
|
func isSamePtr(p1, p2 *Value) bool {
|
2016-02-24 12:58:47 -08:00
|
|
|
if p1 == p2 {
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-03-04 18:55:09 -08:00
|
|
|
if p1.Op != p2.Op {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
switch p1.Op {
|
|
|
|
|
case OpOffPtr:
|
|
|
|
|
return p1.AuxInt == p2.AuxInt && isSamePtr(p1.Args[0], p2.Args[0])
|
|
|
|
|
case OpAddr:
|
|
|
|
|
// OpAddr's 0th arg is either OpSP or OpSB, which means that it is uniquely identified by its Op.
|
|
|
|
|
// Checking for value equality only works after [z]cse has run.
|
|
|
|
|
return p1.Aux == p2.Aux && p1.Args[0].Op == p2.Args[0].Op
|
|
|
|
|
case OpAddPtr:
|
|
|
|
|
return p1.Args[1] == p2.Args[1] && isSamePtr(p1.Args[0], p2.Args[0])
|
|
|
|
|
}
|
|
|
|
|
return false
|
2016-02-13 17:37:19 -06:00
|
|
|
}
|
|
|
|
|
|
2016-07-22 06:41:14 -04:00
|
|
|
// moveSize returns the number of bytes an aligned MOV instruction moves
|
|
|
|
|
func moveSize(align int64, c *Config) int64 {
|
|
|
|
|
switch {
|
|
|
|
|
case align%8 == 0 && c.IntSize == 8:
|
|
|
|
|
return 8
|
|
|
|
|
case align%4 == 0:
|
|
|
|
|
return 4
|
|
|
|
|
case align%2 == 0:
|
|
|
|
|
return 2
|
|
|
|
|
}
|
|
|
|
|
return 1
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-28 21:45:33 -07:00
|
|
|
// mergePoint finds a block among a's blocks which dominates b and is itself
|
|
|
|
|
// dominated by all of a's blocks. Returns nil if it can't find one.
|
|
|
|
|
// Might return nil even if one does exist.
|
|
|
|
|
func mergePoint(b *Block, a ...*Value) *Block {
|
|
|
|
|
// Walk backward from b looking for one of the a's blocks.
|
|
|
|
|
|
|
|
|
|
// Max distance
|
|
|
|
|
d := 100
|
|
|
|
|
|
|
|
|
|
for d > 0 {
|
|
|
|
|
for _, x := range a {
|
|
|
|
|
if b == x.Block {
|
|
|
|
|
goto found
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(b.Preds) > 1 {
|
|
|
|
|
// Don't know which way to go back. Abort.
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2016-04-28 16:52:47 -07:00
|
|
|
b = b.Preds[0].b
|
2016-03-28 21:45:33 -07:00
|
|
|
d--
|
|
|
|
|
}
|
|
|
|
|
return nil // too far away
|
|
|
|
|
found:
|
|
|
|
|
// At this point, r is the first value in a that we find by walking backwards.
|
|
|
|
|
// if we return anything, r will be it.
|
|
|
|
|
r := b
|
|
|
|
|
|
|
|
|
|
// Keep going, counting the other a's that we find. They must all dominate r.
|
|
|
|
|
na := 0
|
|
|
|
|
for d > 0 {
|
|
|
|
|
for _, x := range a {
|
|
|
|
|
if b == x.Block {
|
|
|
|
|
na++
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if na == len(a) {
|
|
|
|
|
// Found all of a in a backwards walk. We can return r.
|
|
|
|
|
return r
|
|
|
|
|
}
|
|
|
|
|
if len(b.Preds) > 1 {
|
|
|
|
|
return nil
|
|
|
|
|
}
|
2016-04-28 16:52:47 -07:00
|
|
|
b = b.Preds[0].b
|
2016-03-28 21:45:33 -07:00
|
|
|
d--
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
return nil // too far away
|
|
|
|
|
}
|
2016-04-20 15:02:48 -07:00
|
|
|
|
|
|
|
|
// clobber invalidates v. Returns true.
|
|
|
|
|
// clobber is used by rewrite rules to:
|
|
|
|
|
// A) make sure v is really dead and never used again.
|
|
|
|
|
// B) decrement use counts of v's args.
|
|
|
|
|
func clobber(v *Value) bool {
|
|
|
|
|
v.reset(OpInvalid)
|
|
|
|
|
// Note: leave v.Block intact. The Block field is used after clobber.
|
|
|
|
|
return true
|
|
|
|
|
}
|
2016-05-24 15:43:25 -07:00
|
|
|
|
2016-09-16 15:02:47 -07:00
|
|
|
// noteRule is an easy way to track if a rule is matched when writing
|
|
|
|
|
// new ones. Make the rule of interest also conditional on
|
|
|
|
|
// noteRule("note to self: rule of interest matched")
|
|
|
|
|
// and that message will print when the rule matches.
|
|
|
|
|
func noteRule(s string) bool {
|
2016-10-25 05:45:52 -07:00
|
|
|
fmt.Println(s)
|
2016-09-16 15:02:47 -07:00
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2016-09-28 10:20:24 -04:00
|
|
|
// warnRule generates a compiler debug output with string s when
|
|
|
|
|
// cond is true and the rule is fired.
|
|
|
|
|
func warnRule(cond bool, v *Value, s string) bool {
|
|
|
|
|
if cond {
|
2017-03-16 22:42:10 -07:00
|
|
|
v.Block.Func.Warnl(v.Pos, s)
|
2016-09-28 10:20:24 -04:00
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
2016-05-24 15:43:25 -07:00
|
|
|
// logRule logs the use of the rule s. This will only be enabled if
|
|
|
|
|
// rewrite rules were generated with the -log option, see gen/rulegen.go.
|
|
|
|
|
func logRule(s string) {
|
|
|
|
|
if ruleFile == nil {
|
|
|
|
|
// Open a log file to write log to. We open in append
|
|
|
|
|
// mode because all.bash runs the compiler lots of times,
|
|
|
|
|
// and we want the concatenation of all of those logs.
|
|
|
|
|
// This means, of course, that users need to rm the old log
|
|
|
|
|
// to get fresh data.
|
|
|
|
|
// TODO: all.bash runs compilers in parallel. Need to synchronize logging somehow?
|
|
|
|
|
w, err := os.OpenFile(filepath.Join(os.Getenv("GOROOT"), "src", "rulelog"),
|
|
|
|
|
os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
|
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
ruleFile = w
|
|
|
|
|
}
|
|
|
|
|
_, err := fmt.Fprintf(ruleFile, "rewrite %s\n", s)
|
|
|
|
|
if err != nil {
|
|
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var ruleFile *os.File
|
2016-12-08 16:17:20 -08:00
|
|
|
|
|
|
|
|
func min(x, y int64) int64 {
|
|
|
|
|
if x < y {
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
return y
|
|
|
|
|
}
|
2017-02-13 16:00:09 -08:00
|
|
|
|
2017-02-03 16:18:01 -05:00
|
|
|
func isConstZero(v *Value) bool {
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case OpConstNil:
|
|
|
|
|
return true
|
|
|
|
|
case OpConst64, OpConst32, OpConst16, OpConst8, OpConstBool, OpConst32F, OpConst64F:
|
|
|
|
|
return v.AuxInt == 0
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
2017-04-03 10:17:48 -07:00
|
|
|
|
|
|
|
|
// reciprocalExact64 reports whether 1/c is exactly representable.
|
|
|
|
|
func reciprocalExact64(c float64) bool {
|
|
|
|
|
b := math.Float64bits(c)
|
|
|
|
|
man := b & (1<<52 - 1)
|
|
|
|
|
if man != 0 {
|
|
|
|
|
return false // not a power of 2, denormal, or NaN
|
|
|
|
|
}
|
|
|
|
|
exp := b >> 52 & (1<<11 - 1)
|
|
|
|
|
// exponent bias is 0x3ff. So taking the reciprocal of a number
|
|
|
|
|
// changes the exponent to 0x7fe-exp.
|
|
|
|
|
switch exp {
|
|
|
|
|
case 0:
|
|
|
|
|
return false // ±0
|
|
|
|
|
case 0x7ff:
|
|
|
|
|
return false // ±inf
|
|
|
|
|
case 0x7fe:
|
|
|
|
|
return false // exponent is not representable
|
|
|
|
|
default:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// reciprocalExact32 reports whether 1/c is exactly representable.
|
|
|
|
|
func reciprocalExact32(c float32) bool {
|
|
|
|
|
b := math.Float32bits(c)
|
|
|
|
|
man := b & (1<<23 - 1)
|
|
|
|
|
if man != 0 {
|
|
|
|
|
return false // not a power of 2, denormal, or NaN
|
|
|
|
|
}
|
|
|
|
|
exp := b >> 23 & (1<<8 - 1)
|
|
|
|
|
// exponent bias is 0x7f. So taking the reciprocal of a number
|
|
|
|
|
// changes the exponent to 0xfe-exp.
|
|
|
|
|
switch exp {
|
|
|
|
|
case 0:
|
|
|
|
|
return false // ±0
|
|
|
|
|
case 0xff:
|
|
|
|
|
return false // ±inf
|
|
|
|
|
case 0xfe:
|
|
|
|
|
return false // exponent is not representable
|
|
|
|
|
default:
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
}
|