mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: use MOVBQZX for OpAMD64LoweredHasCPUFeature
In the commit message of CL 212360, I wrote: > This new intrinsic ... generates MOVB+TESTB+NE. > (It is possible that MOVBQZX+TESTQ+NE would be better.) I should have tested. MOVBQZX+TESTQ+NE does in fact appear to be better. For the benchmark in #36196, on my machine: name old time/op new time/op delta FMA-8 0.86ns ± 6% 0.70ns ± 5% -18.79% (p=0.000 n=98+97) NonFMA-8 0.61ns ± 5% 0.60ns ± 4% -0.74% (p=0.001 n=100+97) Interestingly, these are both considerably faster than the measurements I took a couple of months ago (1.4ns/2ns). It appears that CL 219131 (clearing VZEROUPPER in asyncPreempt) helped a lot. And FMA is now once again slower than NonFMA, although this change helps it regain some ground. Updates #15808 Updates #36351 Updates #36196 Change-Id: I8a326289a963b1939aaa7eaa2fab2ec536467c7d Reviewed-on: https://go-review.googlesource.com/c/go/+/227238 Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Gobot Gobot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
64f19d7080
commit
7ee8467b27
4 changed files with 22 additions and 5 deletions
|
|
@ -903,7 +903,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
p.From.Reg = v.Args[0].Reg()
|
p.From.Reg = v.Args[0].Reg()
|
||||||
gc.AddrAuto(&p.To, v)
|
gc.AddrAuto(&p.To, v)
|
||||||
case ssa.OpAMD64LoweredHasCPUFeature:
|
case ssa.OpAMD64LoweredHasCPUFeature:
|
||||||
p := s.Prog(x86.AMOVB)
|
p := s.Prog(x86.AMOVBQZX)
|
||||||
p.From.Type = obj.TYPE_MEM
|
p.From.Type = obj.TYPE_MEM
|
||||||
gc.AddAux(&p.From, v)
|
gc.AddAux(&p.From, v)
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
|
|
|
||||||
|
|
@ -478,7 +478,8 @@
|
||||||
(GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
|
(GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
|
||||||
(GetCallerPC ...) -> (LoweredGetCallerPC ...)
|
(GetCallerPC ...) -> (LoweredGetCallerPC ...)
|
||||||
(GetCallerSP ...) -> (LoweredGetCallerSP ...)
|
(GetCallerSP ...) -> (LoweredGetCallerSP ...)
|
||||||
(HasCPUFeature ...) -> (LoweredHasCPUFeature ...)
|
|
||||||
|
(HasCPUFeature {s}) -> (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
|
||||||
(Addr ...) -> (LEAQ ...)
|
(Addr ...) -> (LEAQ ...)
|
||||||
(LocalAddr {sym} base _) -> (LEAQ {sym} base)
|
(LocalAddr {sym} base _) -> (LEAQ {sym} base)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -738,7 +738,7 @@ func init() {
|
||||||
// It saves all GP registers if necessary, but may clobber others.
|
// It saves all GP registers if necessary, but may clobber others.
|
||||||
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
|
{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
|
||||||
|
|
||||||
{name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "bool", aux: "Sym", symEffect: "None"},
|
{name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "UInt64", aux: "Sym", symEffect: "None"},
|
||||||
|
|
||||||
// There are three of these functions so that they can have three different register inputs.
|
// There are three of these functions so that they can have three different register inputs.
|
||||||
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
|
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
|
||||||
|
|
|
||||||
|
|
@ -787,8 +787,7 @@ func rewriteValueAMD64(v *Value) bool {
|
||||||
case OpGreater64F:
|
case OpGreater64F:
|
||||||
return rewriteValueAMD64_OpGreater64F(v)
|
return rewriteValueAMD64_OpGreater64F(v)
|
||||||
case OpHasCPUFeature:
|
case OpHasCPUFeature:
|
||||||
v.Op = OpAMD64LoweredHasCPUFeature
|
return rewriteValueAMD64_OpHasCPUFeature(v)
|
||||||
return true
|
|
||||||
case OpHmul32:
|
case OpHmul32:
|
||||||
v.Op = OpAMD64HMULL
|
v.Op = OpAMD64HMULL
|
||||||
return true
|
return true
|
||||||
|
|
@ -29924,6 +29923,23 @@ func rewriteValueAMD64_OpGreater64F(v *Value) bool {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
func rewriteValueAMD64_OpHasCPUFeature(v *Value) bool {
|
||||||
|
b := v.Block
|
||||||
|
typ := &b.Func.Config.Types
|
||||||
|
// match: (HasCPUFeature {s})
|
||||||
|
// result: (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
|
||||||
|
for {
|
||||||
|
s := v.Aux
|
||||||
|
v.reset(OpAMD64SETNE)
|
||||||
|
v0 := b.NewValue0(v.Pos, OpAMD64CMPQconst, types.TypeFlags)
|
||||||
|
v0.AuxInt = 0
|
||||||
|
v1 := b.NewValue0(v.Pos, OpAMD64LoweredHasCPUFeature, typ.UInt64)
|
||||||
|
v1.Aux = s
|
||||||
|
v0.AddArg(v1)
|
||||||
|
v.AddArg(v0)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
|
func rewriteValueAMD64_OpIsInBounds(v *Value) bool {
|
||||||
v_1 := v.Args[1]
|
v_1 := v.Args[1]
|
||||||
v_0 := v.Args[0]
|
v_0 := v.Args[0]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue