mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
[dev.simd] cmd/compile: fix isIntrinsic for methods; fix fp <-> gp moves
also includes a handy debugging hook for the inliner. Change-Id: I23d0619506219d21db78c6c801612ff058562142 Reviewed-on: https://go-review.googlesource.com/c/go/+/694118 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Junyang Shao <shaojunyang@google.com>
This commit is contained in:
parent
08ab8e24a3
commit
d5dea86993
3 changed files with 97 additions and 30 deletions
|
|
@ -43,6 +43,10 @@ func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
|
|||
}
|
||||
}
|
||||
|
||||
func isFPReg(r int16) bool {
|
||||
return x86.REG_X0 <= r && r <= x86.REG_Z31
|
||||
}
|
||||
|
||||
// loadByType returns the load instruction of the given type.
|
||||
func loadByType(t *types.Type) obj.As {
|
||||
// Avoid partial register write
|
||||
|
|
@ -88,31 +92,33 @@ func storeByType(t *types.Type) obj.As {
|
|||
}
|
||||
|
||||
// moveByType returns the reg->reg move instruction of the given type.
|
||||
func moveByType(t *types.Type) obj.As {
|
||||
if t.IsFloat() {
|
||||
func moveByType(from, to *ssa.Value) obj.As {
|
||||
toT := to.Type
|
||||
fromR, toR := from.Reg(), to.Reg()
|
||||
if isFPReg(fromR) && isFPReg(toR) && toT.IsFloat() {
|
||||
// Moving the whole sse2 register is faster
|
||||
// than moving just the correct low portion of it.
|
||||
// There is no xmm->xmm move with 1 byte opcode,
|
||||
// so use movups, which has 2 byte opcode.
|
||||
return x86.AMOVUPS
|
||||
} else if t.IsSIMD() {
|
||||
return simdMov(t.Size())
|
||||
} else {
|
||||
switch t.Size() {
|
||||
case 1:
|
||||
// Avoids partial register write
|
||||
return x86.AMOVL
|
||||
case 2:
|
||||
return x86.AMOVL
|
||||
case 4:
|
||||
return x86.AMOVL
|
||||
case 8:
|
||||
return x86.AMOVQ
|
||||
case 16:
|
||||
return x86.AMOVUPS // int128s are in SSE registers
|
||||
default:
|
||||
panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
|
||||
}
|
||||
}
|
||||
if toT.IsSIMD() {
|
||||
return simdMov(toT.Size())
|
||||
}
|
||||
switch toT.Size() {
|
||||
case 1:
|
||||
// Avoids partial register write
|
||||
return x86.AMOVL
|
||||
case 2:
|
||||
return x86.AMOVL
|
||||
case 4:
|
||||
return x86.AMOVL
|
||||
case 8:
|
||||
return x86.AMOVQ
|
||||
case 16:
|
||||
return x86.AMOVUPS // int128s are in SSE registers
|
||||
default:
|
||||
panic(fmt.Sprintf("bad int register width %d:%v", toT.Size(), toT))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -648,7 +654,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
// But this requires a way for regalloc to know that SRC might be
|
||||
// clobbered by this instruction.
|
||||
t := v.RegTmp()
|
||||
opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
|
||||
opregreg(s, moveByType(v.Args[1], v), t, v.Args[1].Reg())
|
||||
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
|
|
@ -820,13 +826,37 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.From.Offset = v.AuxInt
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x
|
||||
|
||||
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
||||
x := v.Reg()
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_FCONST
|
||||
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x
|
||||
a := v.Op.Asm()
|
||||
if x < x86.REG_X0 { // not an FP register
|
||||
if v.AuxInt == 0 && v.Aux == nil {
|
||||
opregreg(s, x86.AXORL, x, x)
|
||||
break
|
||||
}
|
||||
c := v.AuxInt
|
||||
switch v.Type.Size() {
|
||||
case 4:
|
||||
a = x86.AMOVL
|
||||
c = int64(math.Float32bits(float32(math.Float64frombits(uint64(v.AuxInt)))))
|
||||
case 8:
|
||||
a = x86.AMOVQ
|
||||
default:
|
||||
panic(fmt.Sprintf("unexpected type width for float const into non-float register, %v", v))
|
||||
}
|
||||
p := s.Prog(a)
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = c
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x
|
||||
} else {
|
||||
p := s.Prog(a)
|
||||
p.From.Type = obj.TYPE_FCONST
|
||||
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x
|
||||
}
|
||||
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
|
||||
ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
|
||||
ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
|
||||
|
|
@ -1134,7 +1164,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
y = simdOrMaskReg(v)
|
||||
}
|
||||
if x != y {
|
||||
opregreg(s, moveByType(v.Type), y, x)
|
||||
opregreg(s, moveByType(v.Args[0], v), y, x)
|
||||
}
|
||||
case ssa.OpLoadReg:
|
||||
if v.Type.IsFlags() {
|
||||
|
|
|
|||
|
|
@ -202,6 +202,7 @@ func inlineBudget(fn *ir.Func, profile *pgoir.Profile, relaxed bool, verbose boo
|
|||
// be very liberal here, if the closure is only called once, the budget is large
|
||||
budget = max(budget, inlineClosureCalledOnceCost)
|
||||
}
|
||||
|
||||
return budget
|
||||
}
|
||||
|
||||
|
|
@ -263,6 +264,7 @@ func CanInline(fn *ir.Func, profile *pgoir.Profile) {
|
|||
|
||||
visitor := hairyVisitor{
|
||||
curFunc: fn,
|
||||
debug: isDebugFn(fn),
|
||||
isBigFunc: IsBigFunc(fn),
|
||||
budget: budget,
|
||||
maxBudget: budget,
|
||||
|
|
@ -407,6 +409,7 @@ type hairyVisitor struct {
|
|||
// This is needed to access the current caller in the doNode function.
|
||||
curFunc *ir.Func
|
||||
isBigFunc bool
|
||||
debug bool
|
||||
budget int32
|
||||
maxBudget int32
|
||||
reason string
|
||||
|
|
@ -416,6 +419,16 @@ type hairyVisitor struct {
|
|||
profile *pgoir.Profile
|
||||
}
|
||||
|
||||
func isDebugFn(fn *ir.Func) bool {
|
||||
// if n := fn.Nname; n != nil && n.Sym().Pkg.Path == "0" {
|
||||
// if n.Sym().Name == "BroadcastInt64x4" {
|
||||
// fmt.Printf("isDebugFn '%s' DOT '%s'\n", n.Sym().Pkg.Path, n.Sym().Name)
|
||||
// return true
|
||||
// }
|
||||
// }
|
||||
return false
|
||||
}
|
||||
|
||||
func (v *hairyVisitor) tooHairy(fn *ir.Func) bool {
|
||||
v.do = v.doNode // cache closure
|
||||
if ir.DoChildren(fn, v.do) {
|
||||
|
|
@ -434,6 +447,9 @@ func (v *hairyVisitor) doNode(n ir.Node) bool {
|
|||
if n == nil {
|
||||
return false
|
||||
}
|
||||
if v.debug {
|
||||
fmt.Printf("%v: doNode %v budget is %d\n", ir.Line(n), n.Op(), v.budget)
|
||||
}
|
||||
opSwitch:
|
||||
switch n.Op() {
|
||||
// Call is okay if inlinable and we have the budget for the body.
|
||||
|
|
@ -551,12 +567,19 @@ opSwitch:
|
|||
}
|
||||
|
||||
if cheap {
|
||||
if v.debug {
|
||||
if ir.IsIntrinsicCall(n) {
|
||||
fmt.Printf("%v: cheap call is also intrinsic, %v\n", ir.Line(n), n)
|
||||
}
|
||||
}
|
||||
break // treat like any other node, that is, cost of 1
|
||||
}
|
||||
|
||||
if ir.IsIntrinsicCall(n) {
|
||||
// Treat like any other node.
|
||||
break
|
||||
if v.debug {
|
||||
fmt.Printf("%v: intrinsic call, %v\n", ir.Line(n), n)
|
||||
}
|
||||
break // Treat like any other node.
|
||||
}
|
||||
|
||||
if callee := inlCallee(v.curFunc, n.Fun, v.profile, false); callee != nil && typecheck.HaveInlineBody(callee) {
|
||||
|
|
@ -583,6 +606,10 @@ opSwitch:
|
|||
}
|
||||
}
|
||||
|
||||
if v.debug {
|
||||
fmt.Printf("%v: costly OCALLFUNC %v\n", ir.Line(n), n)
|
||||
}
|
||||
|
||||
// Call cost for non-leaf inlining.
|
||||
v.budget -= extraCost
|
||||
|
||||
|
|
@ -592,6 +619,9 @@ opSwitch:
|
|||
// Things that are too hairy, irrespective of the budget
|
||||
case ir.OCALL, ir.OCALLINTER:
|
||||
// Call cost for non-leaf inlining.
|
||||
if v.debug {
|
||||
fmt.Printf("%v: costly OCALL %v\n", ir.Line(n), n)
|
||||
}
|
||||
v.budget -= v.extraCallCost
|
||||
|
||||
case ir.OPANIC:
|
||||
|
|
@ -743,7 +773,7 @@ opSwitch:
|
|||
v.budget--
|
||||
|
||||
// When debugging, don't stop early, to get full cost of inlining this function
|
||||
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() {
|
||||
if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() && !v.debug {
|
||||
v.reason = "too expensive"
|
||||
return true
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1913,6 +1913,13 @@ func IsIntrinsicCall(n *ir.CallExpr) bool {
|
|||
}
|
||||
name, ok := n.Fun.(*ir.Name)
|
||||
if !ok {
|
||||
if n.Fun.Op() == ir.OMETHEXPR {
|
||||
if meth := ir.MethodExprName(n.Fun); meth != nil {
|
||||
if fn := meth.Func; fn != nil {
|
||||
return IsIntrinsicSym(fn.Sym())
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
return IsIntrinsicSym(name.Sym())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue