mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: fix isIntrinsic for methods; fix fp <-> gp moves
also includes a handy debugging hook for the inliner. Change-Id: I23d0619506219d21db78c6c801612ff058562142 Reviewed-on: https://go-review.googlesource.com/c/go/+/694118 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
08ab8e24a3
commit
d5dea86993
3 changed files with 97 additions and 30 deletions
|
|
@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isFPReg(r int16) bool {
|
||||||
|
return x86.REG_X0 <= r && r <= x86.REG_Z31
|
||||||
|
}
|
||||||
|
|
||||||
// loadByType returns the load instruction of the given type.
|
// loadByType returns the load instruction of the given type.
|
||||||
func loadByType(t *types.Type) obj.As {
|
func loadByType(t *types.Type) obj.As {
|
||||||
// Avoid partial register write
|
// Avoid partial register write
|
||||||
|
|
@ -88,17 +92,20 @@ func storeByType(t *types.Type) obj.As {
|
||||||
}
|
}
|
||||||
|
|
||||||
// moveByType returns the reg->reg move instruction of the given type.
|
// moveByType returns the reg->reg move instruction of the given type.
|
||||||
func moveByType(t *types.Type) obj.As {
|
func moveByType(from, to *ssa.Value) obj.As {
|
||||||
if t.IsFloat() {
|
toT := to.Type
|
||||||
|
fromR, toR := from.Reg(), to.Reg()
|
||||||
|
if isFPReg(fromR) && isFPReg(toR) && toT.IsFloat() {
|
||||||
// Moving the whole sse2 register is faster
|
// Moving the whole sse2 register is faster
|
||||||
// than moving just the correct low portion of it.
|
// than moving just the correct low portion of it.
|
||||||
// There is no xmm->xmm move with 1 byte opcode,
|
// There is no xmm->xmm move with 1 byte opcode,
|
||||||
// so use movups, which has 2 byte opcode.
|
// so use movups, which has 2 byte opcode.
|
||||||
return x86.AMOVUPS
|
return x86.AMOVUPS
|
||||||
} else if t.IsSIMD() {
|
}
|
||||||
return simdMov(t.Size())
|
if toT.IsSIMD() {
|
||||||
} else {
|
return simdMov(toT.Size())
|
||||||
switch t.Size() {
|
}
|
||||||
|
switch toT.Size() {
|
||||||
case 1:
|
case 1:
|
||||||
// Avoids partial register write
|
// Avoids partial register write
|
||||||
return x86.AMOVL
|
return x86.AMOVL
|
||||||
|
|
@ -111,8 +118,7 @@ func moveByType(t *types.Type) obj.As {
|
||||||
case 16:
|
case 16:
|
||||||
return x86.AMOVUPS // int128s are in SSE registers
|
return x86.AMOVUPS // int128s are in SSE registers
|
||||||
default:
|
default:
|
||||||
panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
|
panic(fmt.Sprintf("bad int register width %d:%v", toT.Size(), toT))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -648,7 +654,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
// But this requires a way for regalloc to know that SRC might be
|
// But this requires a way for regalloc to know that SRC might be
|
||||||
// clobbered by this instruction.
|
// clobbered by this instruction.
|
||||||
t := v.RegTmp()
|
t := v.RegTmp()
|
||||||
opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
|
opregreg(s, moveByType(v.Args[1], v), t, v.Args[1].Reg())
|
||||||
|
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
p.From.Type = obj.TYPE_REG
|
p.From.Type = obj.TYPE_REG
|
||||||
|
|
@ -820,13 +826,37 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
p.From.Offset = v.AuxInt
|
p.From.Offset = v.AuxInt
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = x
|
p.To.Reg = x
|
||||||
|
|
||||||
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
||||||
x := v.Reg()
|
x := v.Reg()
|
||||||
p := s.Prog(v.Op.Asm())
|
a := v.Op.Asm()
|
||||||
|
if x < x86.REG_X0 { // not an FP register
|
||||||
|
if v.AuxInt == 0 && v.Aux == nil {
|
||||||
|
opregreg(s, x86.AXORL, x, x)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
c := v.AuxInt
|
||||||
|
switch v.Type.Size() {
|
||||||
|
case 4:
|
||||||
|
a = x86.AMOVL
|
||||||
|
c = int64(math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt)))))
|
||||||
|
case 8:
|
||||||
|
a = x86.AMOVQ
|
||||||
|
default:
|
||||||
|
panic(fmt.Sprintf("unexpected type width for float const into non-float register, %v", v))
|
||||||
|
}
|
||||||
|
p := s.Prog(a)
|
||||||
|
p.From.Type = obj.TYPE_CONST
|
||||||
|
p.From.Offset = c
|
||||||
|
p.To.Type = obj.TYPE_REG
|
||||||
|
p.To.Reg = x
|
||||||
|
} else {
|
||||||
|
p := s.Prog(a)
|
||||||
p.From.Type = obj.TYPE_FCONST
|
p.From.Type = obj.TYPE_FCONST
|
||||||
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
||||||
p.To.Type = obj.TYPE_REG
|
p.To.Type = obj.TYPE_REG
|
||||||
p.To.Reg = x
|
p.To.Reg = x
|
||||||
|
}
|
||||||
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
|
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
|
||||||
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
|
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
|
||||||
ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
|
ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
|
||||||
|
|
@ -1134,7 +1164,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
||||||
y = simdOrMaskReg(v)
|
y = simdOrMaskReg(v)
|
||||||
}
|
}
|
||||||
if x != y {
|
if x != y {
|
||||||
opregreg(s, moveByType(v.Type), y, x)
|
opregreg(s, moveByType(v.Args[0], v), y, x)
|
||||||
}
|
}
|
||||||
case ssa.OpLoadReg:
|
case ssa.OpLoadReg:
|
||||||
if v.Type.IsFlags() {
|
if v.Type.IsFlags() {
|
||||||
|
|
|
||||||
|
|
@ -202,6 +202,7 @@ func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose boo
|
||||||
// be very liberal here, if the closure is only called once, the budget is large
|
// be very liberal here, if the closure is only called once, the budget is large
|
||||||
budget = max(budget, inlineClosureCalledOnceCost)
|
budget = max(budget, inlineClosureCalledOnceCost)
|
||||||
}
|
}
|
||||||
|
|
||||||
return budget
|
return budget
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -263,6 +264,7 @@ func CanInline(fn *ir.Func, profile *pgoir.Profile) {
|
||||||
|
|
||||||
visitor := hairyVisitor{
|
visitor := hairyVisitor{
|
||||||
curFunc: fn,
|
curFunc: fn,
|
||||||
|
debug: isDebugFn(fn),
|
||||||
isBigFunc: IsBigFunc(fn),
|
isBigFunc: IsBigFunc(fn),
|
||||||
budget: budget,
|
budget: budget,
|
||||||
maxBudget: budget,
|
maxBudget: budget,
|
||||||
|
|
@ -407,6 +409,7 @@ type hairyVisitor struct {
|
||||||
// This is needed to access the current caller in the doNode function.
|
// This is needed to access the current caller in the doNode function.
|
||||||
curFunc *ir.Func
|
curFunc *ir.Func
|
||||||
isBigFunc bool
|
isBigFunc bool
|
||||||
|
debug bool
|
||||||
budget int32
|
budget int32
|
||||||
maxBudget int32
|
maxBudget int32
|
||||||
reason string
|
reason string
|
||||||
|
|
@ -416,6 +419,16 @@ type hairyVisitor struct {
|
||||||
profile *pgoir.Profile
|
profile *pgoir.Profile
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isDebugFn(fn *ir.Func) bool {
|
||||||
|
// if n := fn.Nname; n != nil && n.Sym().Pkg.Path == "0" {
|
||||||
|
// if n.Sym().Name == "BroadcastInt64x4" {
|
||||||
|
// fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
|
||||||
|
// return true
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
|
func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
|
||||||
v.do = v.doNode // cache closure
|
v.do = v.doNode // cache closure
|
||||||
if ir.DoChildren(fn, v.do) {
|
if ir.DoChildren(fn, v.do) {
|
||||||
|
|
@ -434,6 +447,9 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
|
||||||
if n == nil {
|
if n == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
if v.debug {
|
||||||
|
fmt.Printf("%v: doNode %v budget is %d\n", ir.Line(n), n.Op(), v.budget)
|
||||||
|
}
|
||||||
opSwitch:
|
opSwitch:
|
||||||
switch n.Op() {
|
switch n.Op() {
|
||||||
// Call is okay if inlinable and we have the budget for the body.
|
// Call is okay if inlinable and we have the budget for the body.
|
||||||
|
|
@ -551,12 +567,19 @@ opSwitch:
|
||||||
}
|
}
|
||||||
|
|
||||||
if cheap {
|
if cheap {
|
||||||
|
if v.debug {
|
||||||
|
if ir.IsIntrinsicCall(n) {
|
||||||
|
fmt.Printf("%v: cheap call is also intrinsic, %v\n", ir.Line(n), n)
|
||||||
|
}
|
||||||
|
}
|
||||||
break // treat like any other node, that is, cost of 1
|
break // treat like any other node, that is, cost of 1
|
||||||
}
|
}
|
||||||
|
|
||||||
if ir.IsIntrinsicCall(n) {
|
if ir.IsIntrinsicCall(n) {
|
||||||
// Treat like any other node.
|
if v.debug {
|
||||||
break
|
fmt.Printf("%v: intrinsic call, %v\n", ir.Line(n), n)
|
||||||
|
}
|
||||||
|
break // Treat like any other node.
|
||||||
}
|
}
|
||||||
|
|
||||||
if callee := inlCallee(v.curFunc, n.Fun, v.profile, false); callee != nil && typecheck.HaveInlineBody(callee) {
|
if callee := inlCallee(v.curFunc, n.Fun, v.profile, false); callee != nil && typecheck.HaveInlineBody(callee) {
|
||||||
|
|
@ -583,6 +606,10 @@ opSwitch:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if v.debug {
|
||||||
|
fmt.Printf("%v: costly OCALLFUNC %v\n", ir.Line(n), n)
|
||||||
|
}
|
||||||
|
|
||||||
// Call cost for non-leaf inlining.
|
// Call cost for non-leaf inlining.
|
||||||
v.budget -= extraCost
|
v.budget -= extraCost
|
||||||
|
|
||||||
|
|
@ -592,6 +619,9 @@ opSwitch:
|
||||||
// Things that are too hairy, irrespective of the budget
|
// Things that are too hairy, irrespective of the budget
|
||||||
case ir.OCALL, ir.OCALLINTER:
|
case ir.OCALL, ir.OCALLINTER:
|
||||||
// Call cost for non-leaf inlining.
|
// Call cost for non-leaf inlining.
|
||||||
|
if v.debug {
|
||||||
|
fmt.Printf("%v: costly OCALL %v\n", ir.Line(n), n)
|
||||||
|
}
|
||||||
v.budget -= v.extraCallCost
|
v.budget -= v.extraCallCost
|
||||||
|
|
||||||
case ir.OPANIC:
|
case ir.OPANIC:
|
||||||
|
|
@ -743,7 +773,7 @@ opSwitch:
|
||||||
v.budget--
|
v.budget--
|
||||||
|
|
||||||
// When debugging, don't stop early, to get full cost of inlining this function
|
// When debugging, don't stop early, to get full cost of inlining this function
|
||||||
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
|
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() && !v.debug {
|
||||||
v.reason = "too expensive"
|
v.reason = "too expensive"
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1913,6 +1913,13 @@ func IsIntrinsicCall(n *ir.CallExpr) bool {
|
||||||
}
|
}
|
||||||
name, ok := n.Fun.(*ir.Name)
|
name, ok := n.Fun.(*ir.Name)
|
||||||
if !ok {
|
if !ok {
|
||||||
|
if n.Fun.Op() == ir.OMETHEXPR {
|
||||||
|
if meth := ir.MethodExprName(n.Fun); meth != nil {
|
||||||
|
if fn := meth.Func; fn != nil {
|
||||||
|
return IsIntrinsicSym(fn.Sym())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return IsIntrinsicSym(name.Sym())
|
return IsIntrinsicSym(name.Sym())
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue