mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
Performance is kind of hard to exactly quantify. One big difference between jump tables and the old binary search scheme is that there's only 1 branch statement instead of O(n) of them. That can be both a blessing and a curse, and can make evaluating jump tables very hard to do. The single branch can become a choke point for the hardware branch predictor. A branch table jump must fit all of its state in a single branch predictor entry (technically, a branch target predictor entry). With binary search that predictor state can be spread among lots of entries. In cases where the case selection is repetitive and thus predictable, binary search can perform better. The big win for a jump table is that it doesn't consume so much of the branch predictor's resources. But that benefit is essentially never observed in microbenchmarks, because the branch predictor can easily keep state for all the binary search branches in a microbenchmark. So that benefit is really hard to measure. So predictable switch microbenchmarks are ~useless - they will almost always favor the binary search scheme. Fully unpredictable switch microbenchmarks are better, as they aren't lying to us quite so much. In a perfectly unpredictable situation, a jump table will expect to incur 1-1/N branch mispredicts, where a binary search would incur lg(N)/2 of them. That makes the crossover point at about N=4. But of course switches in real programs are seldom fully unpredictable, so we'll use a higher crossover point. Beyond the branch predictor, jump tables tend to execute more instructions per switch but have no additional instructions per case, which also argues for a larger crossover. As far as code size goes, with this CL cmd/go has a slightly smaller code segment and a slightly larger overall size (from the jump tables themselves which live in the data segment). This is a case where some FDO (feedback-directed optimization) would be really nice to have. #28262 Some large-program benchmarks might help make the case for this CL. Especially if we can turn on branch mispredict counters so we can see how much using jump tables can free up branch prediction resources that can be gainfully used elsewhere in the program. name old time/op new time/op delta Switch8Predictable 1.89ns ± 2% 1.27ns ± 3% -32.58% (p=0.000 n=9+10) Switch8Unpredictable 9.33ns ± 1% 7.50ns ± 1% -19.60% (p=0.000 n=10+9) Switch32Predictable 2.20ns ± 2% 1.64ns ± 1% -25.39% (p=0.000 n=10+9) Switch32Unpredictable 10.0ns ± 2% 7.6ns ± 2% -24.04% (p=0.000 n=10+10) Fixes #5496 Update #34381 Change-Id: I3ff56011d02be53f605ca5fd3fb96b905517c34f Reviewed-on: https://go-review.googlesource.com/c/go/+/357330 Run-TryBot: Keith Randall <khr@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com> Reviewed-by: Keith Randall <khr@google.com>
126 lines
3.2 KiB
Go
126 lines
3.2 KiB
Go
// Copyright 2015 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package ssa
|
|
|
|
import (
|
|
"testing"
|
|
|
|
"cmd/compile/internal/ir"
|
|
"cmd/compile/internal/typecheck"
|
|
"cmd/compile/internal/types"
|
|
"cmd/internal/obj"
|
|
"cmd/internal/obj/arm64"
|
|
"cmd/internal/obj/s390x"
|
|
"cmd/internal/obj/x86"
|
|
"cmd/internal/src"
|
|
)
|
|
|
|
var CheckFunc = checkFunc
|
|
var Opt = opt
|
|
var Deadcode = deadcode
|
|
var Copyelim = copyelim
|
|
|
|
var testCtxts = map[string]*obj.Link{
|
|
"amd64": obj.Linknew(&x86.Linkamd64),
|
|
"s390x": obj.Linknew(&s390x.Links390x),
|
|
"arm64": obj.Linknew(&arm64.Linkarm64),
|
|
}
|
|
|
|
func testConfig(tb testing.TB) *Conf { return testConfigArch(tb, "amd64") }
|
|
func testConfigS390X(tb testing.TB) *Conf { return testConfigArch(tb, "s390x") }
|
|
func testConfigARM64(tb testing.TB) *Conf { return testConfigArch(tb, "arm64") }
|
|
|
|
func testConfigArch(tb testing.TB, arch string) *Conf {
|
|
ctxt, ok := testCtxts[arch]
|
|
if !ok {
|
|
tb.Fatalf("unknown arch %s", arch)
|
|
}
|
|
if ctxt.Arch.PtrSize != 8 {
|
|
tb.Fatal("testTypes is 64-bit only")
|
|
}
|
|
c := &Conf{
|
|
config: NewConfig(arch, testTypes, ctxt, true, false),
|
|
tb: tb,
|
|
}
|
|
return c
|
|
}
|
|
|
|
type Conf struct {
|
|
config *Config
|
|
tb testing.TB
|
|
fe Frontend
|
|
}
|
|
|
|
func (c *Conf) Frontend() Frontend {
|
|
if c.fe == nil {
|
|
c.fe = TestFrontend{t: c.tb, ctxt: c.config.ctxt}
|
|
}
|
|
return c.fe
|
|
}
|
|
|
|
// TestFrontend is a test-only frontend.
|
|
// It assumes 64 bit integers and pointers.
|
|
type TestFrontend struct {
|
|
t testing.TB
|
|
ctxt *obj.Link
|
|
}
|
|
|
|
func (TestFrontend) StringData(s string) *obj.LSym {
|
|
return nil
|
|
}
|
|
func (TestFrontend) Auto(pos src.XPos, t *types.Type) *ir.Name {
|
|
n := ir.NewNameAt(pos, &types.Sym{Name: "aFakeAuto"})
|
|
n.Class = ir.PAUTO
|
|
return n
|
|
}
|
|
func (d TestFrontend) SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot {
|
|
return LocalSlot{N: parent.N, Type: t, Off: offset}
|
|
}
|
|
func (TestFrontend) Line(_ src.XPos) string {
|
|
return "unknown.go:0"
|
|
}
|
|
func (TestFrontend) AllocFrame(f *Func) {
|
|
}
|
|
func (d TestFrontend) Syslook(s string) *obj.LSym {
|
|
return d.ctxt.Lookup(s)
|
|
}
|
|
func (TestFrontend) UseWriteBarrier() bool {
|
|
return true // only writebarrier_test cares
|
|
}
|
|
func (TestFrontend) SetWBPos(pos src.XPos) {
|
|
}
|
|
|
|
func (d TestFrontend) Logf(msg string, args ...interface{}) { d.t.Logf(msg, args...) }
|
|
func (d TestFrontend) Log() bool { return true }
|
|
|
|
func (d TestFrontend) Fatalf(_ src.XPos, msg string, args ...interface{}) { d.t.Fatalf(msg, args...) }
|
|
func (d TestFrontend) Warnl(_ src.XPos, msg string, args ...interface{}) { d.t.Logf(msg, args...) }
|
|
func (d TestFrontend) Debug_checknil() bool { return false }
|
|
|
|
func (d TestFrontend) MyImportPath() string {
|
|
return "my/import/path"
|
|
}
|
|
func (d TestFrontend) LSym() string {
|
|
return "my/import/path.function"
|
|
}
|
|
|
|
var testTypes Types
|
|
|
|
func init() {
|
|
// TODO(mdempsky): Push into types.InitUniverse or typecheck.InitUniverse.
|
|
types.PtrSize = 8
|
|
types.RegSize = 8
|
|
types.MaxWidth = 1 << 50
|
|
|
|
typecheck.InitUniverse()
|
|
testTypes.SetTypPtrs()
|
|
}
|
|
|
|
func (d TestFrontend) DerefItab(sym *obj.LSym, off int64) *obj.LSym { return nil }
|
|
|
|
func (d TestFrontend) CanSSA(t *types.Type) bool {
|
|
// There are no un-SSAable types in test land.
|
|
return true
|
|
}
|