diff --git a/src/cmd/compile/internal/deadlocals/deadlocals.go b/src/cmd/compile/internal/deadlocals/deadlocals.go index 238450416a9..55ad0387a4d 100644 --- a/src/cmd/compile/internal/deadlocals/deadlocals.go +++ b/src/cmd/compile/internal/deadlocals/deadlocals.go @@ -44,6 +44,11 @@ func Funcs(fns []*ir.Func) { *as.lhs = ir.BlankNode *as.rhs = zero } + if len(assigns) > 0 { + // k.Defn might be pointing at one of the + // assignments we're overwriting. + k.Defn = nil + } } } } diff --git a/src/cmd/compile/internal/escape/leaks.go b/src/cmd/compile/internal/escape/leaks.go index 942f87d2a22..176bccd8470 100644 --- a/src/cmd/compile/internal/escape/leaks.go +++ b/src/cmd/compile/internal/escape/leaks.go @@ -124,3 +124,21 @@ func parseLeaks(s string) leaks { copy(l[:], s[4:]) return l } + +func ParseLeaks(s string) leaks { + return parseLeaks(s) +} + +// Any reports whether the value flows anywhere at all. +func (l leaks) Any() bool { + // TODO: do mutator/callee matter? + if l.Heap() >= 0 || l.Mutator() >= 0 || l.Callee() >= 0 { + return true + } + for i := range numEscResults { + if l.Result(i) >= 0 { + return true + } + } + return false +} diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go index 42e2afaee4f..ef6a5d6017c 100644 --- a/src/cmd/compile/internal/gc/main.go +++ b/src/cmd/compile/internal/gc/main.go @@ -22,6 +22,7 @@ import ( "cmd/compile/internal/pkginit" "cmd/compile/internal/reflectdata" "cmd/compile/internal/rttype" + "cmd/compile/internal/slice" "cmd/compile/internal/ssa" "cmd/compile/internal/ssagen" "cmd/compile/internal/staticinit" @@ -266,6 +267,8 @@ func Main(archInit func(*ssagen.ArchInfo)) { base.Timer.Start("fe", "escapes") escape.Funcs(typecheck.Target.Funcs) + slice.Funcs(typecheck.Target.Funcs) + loopvar.LogTransformations(transformed) // Collect information for go:nowritebarrierrec diff --git a/src/cmd/compile/internal/ir/expr.go b/src/cmd/compile/internal/ir/expr.go index 7a75ff40f2d..dd1b94aa0da 100644 --- a/src/cmd/compile/internal/ir/expr.go +++ b/src/cmd/compile/internal/ir/expr.go @@ -192,6 +192,7 @@ type CallExpr struct { IsDDD bool GoDefer bool // whether this call is part of a go or defer statement NoInline bool // whether this call must not be inlined + UseBuf bool // use stack buffer for backing store (OAPPEND only) } func NewCallExpr(pos src.XPos, op Op, fun Node, args []Node) *CallExpr { @@ -1269,3 +1270,28 @@ func MethodExprFunc(n Node) *types.Field { base.Fatalf("unexpected node: %v (%v)", n, n.Op()) panic("unreachable") } + +// A MoveToHeapExpr takes a slice as input and moves it to the +// heap (by copying the backing store if it is not already +// on the heap). +type MoveToHeapExpr struct { + miniExpr + Slice Node + // An expression that evaluates to a *runtime._type + // that represents the slice element type. + RType Node + // If PreserveCapacity is true, the capacity of + // the resulting slice, and all of the elements in + // [len:cap], must be preserved. + // If PreserveCapacity is false, the resulting + // slice may have any capacity >= len, with any + // elements in the resulting [len:cap] range zeroed. + PreserveCapacity bool +} + +func NewMoveToHeapExpr(pos src.XPos, slice Node) *MoveToHeapExpr { + n := &MoveToHeapExpr{Slice: slice} + n.pos = pos + n.op = OMOVE2HEAP + return n +} diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go index 01f1c0c5022..63f1b1c931c 100644 --- a/src/cmd/compile/internal/ir/name.go +++ b/src/cmd/compile/internal/ir/name.go @@ -43,7 +43,7 @@ type Name struct { Func *Func // TODO(austin): nil for I.M Offset_ int64 val constant.Value - Opt any // for use by escape analysis + Opt any // for use by escape or slice analysis Embed *[]Embed // list of embedded files, for ONAME var // For a local variable (not param) or extern, the initializing assignment (OAS or OAS2). diff --git a/src/cmd/compile/internal/ir/node.go b/src/cmd/compile/internal/ir/node.go index 8c61bb6ed5a..f26f61cb18a 100644 --- a/src/cmd/compile/internal/ir/node.go +++ b/src/cmd/compile/internal/ir/node.go @@ -293,6 +293,7 @@ const ( OLINKSYMOFFSET // offset within a name OJUMPTABLE // A jump table structure for implementing dense expression switches OINTERFACESWITCH // A type switch with interface cases + OMOVE2HEAP // Promote a stack-backed slice to heap // opcodes for generics ODYNAMICDOTTYPE // x = i.(T) where T is a type parameter (or derived from a type parameter) diff --git a/src/cmd/compile/internal/ir/node_gen.go b/src/cmd/compile/internal/ir/node_gen.go index 2221045c93d..4298b3a43d7 100644 --- a/src/cmd/compile/internal/ir/node_gen.go +++ b/src/cmd/compile/internal/ir/node_gen.go @@ -1175,6 +1175,34 @@ func (n *MakeExpr) editChildrenWithHidden(edit func(Node) Node) { } } +func (n *MoveToHeapExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } +func (n *MoveToHeapExpr) copy() Node { + c := *n + c.init = copyNodes(c.init) + return &c +} +func (n *MoveToHeapExpr) doChildren(do func(Node) bool) bool { + if doNodes(n.init, do) { + return true + } + if n.Slice != nil && do(n.Slice) { + return true + } + return false +} +func (n *MoveToHeapExpr) doChildrenWithHidden(do func(Node) bool) bool { + return n.doChildren(do) +} +func (n *MoveToHeapExpr) editChildren(edit func(Node) Node) { + editNodes(n.init, edit) + if n.Slice != nil { + n.Slice = edit(n.Slice).(Node) + } +} +func (n *MoveToHeapExpr) editChildrenWithHidden(edit func(Node) Node) { + n.editChildren(edit) +} + func (n *Name) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } func (n *NilExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) } diff --git a/src/cmd/compile/internal/ir/op_string.go b/src/cmd/compile/internal/ir/op_string.go index 7494beee4c5..f042ad84a40 100644 --- a/src/cmd/compile/internal/ir/op_string.go +++ b/src/cmd/compile/internal/ir/op_string.go @@ -151,18 +151,19 @@ func _() { _ = x[OLINKSYMOFFSET-140] _ = x[OJUMPTABLE-141] _ = x[OINTERFACESWITCH-142] - _ = x[ODYNAMICDOTTYPE-143] - _ = x[ODYNAMICDOTTYPE2-144] - _ = x[ODYNAMICTYPE-145] - _ = x[OTAILCALL-146] - _ = x[OGETG-147] - _ = x[OGETCALLERSP-148] - _ = x[OEND-149] + _ = x[OMOVE2HEAP-143] + _ = x[ODYNAMICDOTTYPE-144] + _ = x[ODYNAMICDOTTYPE2-145] + _ = x[ODYNAMICTYPE-146] + _ = x[OTAILCALL-147] + _ = x[OGETG-148] + _ = x[OGETCALLERSP-149] + _ = x[OEND-150] } -const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND" +const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHMOVE2HEAPDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND" -var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935} +var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 878, 892, 907, 918, 926, 930, 941, 944} func (i Op) String() string { if i >= Op(len(_Op_index)-1) { diff --git a/src/cmd/compile/internal/ir/stmt.go b/src/cmd/compile/internal/ir/stmt.go index 0801ecdd9e8..affa5f4551e 100644 --- a/src/cmd/compile/internal/ir/stmt.go +++ b/src/cmd/compile/internal/ir/stmt.go @@ -42,6 +42,7 @@ func (*Decl) isStmt() {} type Stmt interface { Node isStmt() + PtrInit() *Nodes } // A miniStmt is a miniNode with extra fields common to statements. diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go index f8eb4578809..828c3b553a6 100644 --- a/src/cmd/compile/internal/ir/symtab.go +++ b/src/cmd/compile/internal/ir/symtab.go @@ -29,6 +29,11 @@ type symsStruct struct { GCWriteBarrier [8]*obj.LSym Goschedguarded *obj.LSym Growslice *obj.LSym + GrowsliceBuf *obj.LSym + MoveSlice *obj.LSym + MoveSliceNoScan *obj.LSym + MoveSliceNoCap *obj.LSym + MoveSliceNoCapNoScan *obj.LSym InterfaceSwitch *obj.LSym MallocGC *obj.LSym MallocGCSmallNoScan [27]*obj.LSym diff --git a/src/cmd/compile/internal/slice/slice.go b/src/cmd/compile/internal/slice/slice.go new file mode 100644 index 00000000000..7a32e7adbd2 --- /dev/null +++ b/src/cmd/compile/internal/slice/slice.go @@ -0,0 +1,455 @@ +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package slice + +// This file implements a stack-allocation optimization +// for the backing store of slices. +// +// Consider the code: +// +// var s []int +// for i := range ... { +// s = append(s, i) +// } +// return s +// +// Some of the append operations will need to do an allocation +// by calling growslice. This will happen on the 1st, 2nd, 4th, +// 8th, etc. append calls. The allocations done by all but the +// last growslice call will then immediately be garbage. +// +// We'd like to avoid doing some of those intermediate +// allocations if possible. +// +// If we can determine that the "return s" statement is the +// *only* way that the backing store for s escapes, then we +// can rewrite the code to something like: +// +// var s []int +// for i := range N { +// s = append(s, i) +// } +// s = move2heap(s) +// return s +// +// Using the move2heap runtime function, which does: +// +// move2heap(s): +// If s is not backed by a stackframe-allocated +// backing store, return s. Otherwise, copy s +// to the heap and return the copy. +// +// Now we can treat the backing store of s allocated at the +// append site as not escaping. Previous stack allocation +// optimizations now apply, which can use a fixed-size +// stack-allocated backing store for s when appending. +// (See ../ssagen/ssa.go:(*state).append) +// +// It is tricky to do this optimization safely. To describe +// our analysis, we first define what an "exclusive" slice +// variable is. +// +// A slice variable (a variable of slice type) is called +// "exclusive" if, when it has a reference to a +// stackframe-allocated backing store, it is the only +// variable with such a reference. +// +// In other words, a slice variable is exclusive if +// any of the following holds: +// 1) It points to a heap-allocated backing store +// 2) It points to a stack-allocated backing store +// for any parent frame. +// 3) It is the only variable that references its +// backing store. +// 4) It is nil. +// +// The nice thing about exclusive slice variables is that +// it is always safe to do +// s = move2heap(s) +// whenever s is an exclusive slice variable. Because no +// one else has a reference to the backing store, no one +// else can tell that we moved the backing store from one +// location to another. +// +// Note that exclusiveness is a dynamic property. A slice +// variable may be exclusive during some parts of execution +// and not exclusive during others. +// +// The following operations set or preserve the exclusivity +// of a slice variable s: +// s = nil +// s = append(s, ...) +// s = s[i:j] +// ... = s[i] +// s[i] = ... +// f(s) where f does not escape its argument +// Other operations destroy exclusivity. A non-exhaustive list includes: +// x = s +// *p = s +// f(s) where f escapes its argument +// return s +// To err on the safe side, we white list exclusivity-preserving +// operations and we asssume that any other operations that mention s +// destroy its exclusivity. +// +// Our strategy is to move the backing store of s to the heap before +// any exclusive->nonexclusive transition. That way, s will only ever +// have a reference to a stack backing store while it is exclusive. +// +// move2heap for a variable s is implemented with: +// if s points to within the stack frame { +// s2 := make([]T, s.len, s.cap) +// copy(s2[:s.cap], s[:s.cap]) +// s = s2 +// } +// Note that in general we need to copy all of s[:cap(s)] elements when +// moving to the heap. As an optimization, we keep track of slice variables +// whose capacity, and the elements in s[len(s):cap(s)], are never accessed. +// For those slice variables, we can allocate to the next size class above +// the length, which saves memory and copying cost. + +import ( + "cmd/compile/internal/base" + "cmd/compile/internal/escape" + "cmd/compile/internal/ir" + "cmd/compile/internal/reflectdata" +) + +func Funcs(all []*ir.Func) { + if base.Flag.N != 0 { + return + } + for _, fn := range all { + analyze(fn) + } +} + +func analyze(fn *ir.Func) { + type sliceInfo struct { + // Slice variable. + s *ir.Name + + // Count of uses that this pass understands. + okUses int32 + // Count of all uses found. + allUses int32 + + // A place where the slice variable transitions from + // exclusive to nonexclusive. + // We could keep track of more than one, but one is enough for now. + // Currently, this can be either a return statement or + // an assignment. + // TODO: other possible transitions? + transition ir.Stmt + + // Each s = append(s, ...) instance we found. + appends []*ir.CallExpr + + // Weight of the number of s = append(s, ...) instances we found. + // The optimizations we do are only really useful if there are at + // least weight 2. (Note: appends in loops have weight >= 2.) + appendWeight int + + // Whether we ever do cap(s), or other operations that use cap(s) + // (possibly implicitly), like s[i:j]. + capUsed bool + } + + // Every variable (*ir.Name) that we are tracking will have + // a non-nil *sliceInfo in its Opt field. + haveLocalSlice := false + maxStackSize := int64(base.Debug.VariableMakeThreshold) + var namedRets []*ir.Name + for _, s := range fn.Dcl { + if !s.Type().IsSlice() { + continue + } + if s.Type().Elem().Size() > maxStackSize { + continue + } + if !base.VariableMakeHash.MatchPos(s.Pos(), nil) { + continue + } + s.Opt = &sliceInfo{s: s} // start tracking s + haveLocalSlice = true + if s.Class == ir.PPARAMOUT { + namedRets = append(namedRets, s) + } + } + if !haveLocalSlice { + return + } + + // Keep track of loop depth while walking. + loopDepth := 0 + + // tracking returns the info for the slice variable if n is a slice + // variable that we're still considering, or nil otherwise. + tracking := func(n ir.Node) *sliceInfo { + if n == nil || n.Op() != ir.ONAME { + return nil + } + s := n.(*ir.Name) + if s.Opt == nil { + return nil + } + return s.Opt.(*sliceInfo) + } + + // addTransition(n, loc) records that s experiences an exclusive->nonexclusive + // transition somewhere within loc. + addTransition := func(i *sliceInfo, loc ir.Stmt) { + if i.transition != nil { + // We only keep track of a single exclusive->nonexclusive transition + // for a slice variable. If we find more than one, give up. + // (More than one transition location would be fine, but we would + // start to get worried about introducing too much additional code.) + i.s.Opt = nil + return + } + i.transition = loc + } + + // Examine an x = y assignment that occurs somewhere within statement stmt. + assign := func(x, y ir.Node, stmt ir.Stmt) { + if i := tracking(x); i != nil { + // s = y. Check for understood patterns for y. + if y == nil || y.Op() == ir.ONIL { + // s = nil is ok. + i.okUses++ + } else if y.Op() == ir.OSLICELIT { + // s = []{...} is ok. + // Note: this reveals capacity. Should it? + i.okUses++ + i.capUsed = true + } else if y.Op() == ir.OSLICE { + y := y.(*ir.SliceExpr) + if y.X == i.s { + // s = s[...:...] is ok + i.okUses += 2 + i.capUsed = true + } + } else if y.Op() == ir.OAPPEND { + y := y.(*ir.CallExpr) + if y.Args[0] == i.s { + // s = append(s, ...) is ok + i.okUses += 2 + i.appends = append(i.appends, y) + i.appendWeight += 1 + loopDepth + } + // TODO: s = append(nil, ...)? + } + // Note that technically s = make([]T, ...) preserves exclusivity, but + // we don't track that because we assume users who wrote that know + // better than the compiler does. + + // TODO: figure out how to handle s = fn(..., s, ...) + // It would be nice to maintain exclusivity of s in this situation. + // But unfortunately, fn can return one of its other arguments, which + // may be a slice with a stack-allocated backing store other than s. + // (which may have preexisting references to its backing store). + // + // Maybe we could do it if s is the only argument? + } + + if i := tracking(y); i != nil { + // ... = s + // Treat this as an exclusive->nonexclusive transition. + i.okUses++ + addTransition(i, stmt) + } + } + + var do func(ir.Node) bool + do = func(n ir.Node) bool { + if n == nil { + return false + } + switch n.Op() { + case ir.ONAME: + if i := tracking(n); i != nil { + // A use of a slice variable. Count it. + i.allUses++ + } + case ir.ODCL: + n := n.(*ir.Decl) + if i := tracking(n.X); i != nil { + i.okUses++ + } + case ir.OINDEX: + n := n.(*ir.IndexExpr) + if i := tracking(n.X); i != nil { + // s[i] is ok. + i.okUses++ + } + case ir.OLEN: + n := n.(*ir.UnaryExpr) + if i := tracking(n.X); i != nil { + // len(s) is ok + i.okUses++ + } + case ir.OCAP: + n := n.(*ir.UnaryExpr) + if i := tracking(n.X); i != nil { + // cap(s) is ok + i.okUses++ + i.capUsed = true + } + case ir.OADDR: + n := n.(*ir.AddrExpr) + if n.X.Op() == ir.OINDEX { + n := n.X.(*ir.IndexExpr) + if i := tracking(n.X); i != nil { + // &s[i] is definitely a nonexclusive transition. + // (We need this case because s[i] is ok, but &s[i] is not.) + i.s.Opt = nil + } + } + case ir.ORETURN: + n := n.(*ir.ReturnStmt) + for _, x := range n.Results { + if i := tracking(x); i != nil { + i.okUses++ + // We go exclusive->nonexclusive here + addTransition(i, n) + } + } + if len(n.Results) == 0 { + // Uses of named result variables are implicit here. + for _, x := range namedRets { + if i := tracking(x); i != nil { + addTransition(i, n) + } + } + } + case ir.OCALLFUNC: + n := n.(*ir.CallExpr) + for idx, arg := range n.Args { + if i := tracking(arg); i != nil { + if !argLeak(n, idx) { + // Passing s to a nonescaping arg is ok. + i.okUses++ + i.capUsed = true + } + } + } + case ir.ORANGE: + // Range over slice is ok. + n := n.(*ir.RangeStmt) + if i := tracking(n.X); i != nil { + i.okUses++ + } + case ir.OAS: + n := n.(*ir.AssignStmt) + assign(n.X, n.Y, n) + case ir.OAS2: + n := n.(*ir.AssignListStmt) + for i := range len(n.Lhs) { + assign(n.Lhs[i], n.Rhs[i], n) + } + case ir.OCLOSURE: + n := n.(*ir.ClosureExpr) + for _, v := range n.Func.ClosureVars { + do(v.Outer) + } + } + if n.Op() == ir.OFOR || n.Op() == ir.ORANGE { + // Note: loopDepth isn't really right for init portion + // of the for statement, but that's ok. Correctness + // does not depend on depth info. + loopDepth++ + defer func() { loopDepth-- }() + } + // Check all the children. + ir.DoChildren(n, do) + return false + } + + // Run the analysis over the whole body. + for _, stmt := range fn.Body { + do(stmt) + } + + // Process accumulated info to find slice variables + // that we can allocate on the stack. + for _, s := range fn.Dcl { + if s.Opt == nil { + continue + } + i := s.Opt.(*sliceInfo) + s.Opt = nil + if i.okUses != i.allUses { + // Some use of i.s that don't understand lurks. Give up. + continue + } + + // At this point, we've decided that we *can* do + // the optimization. + + if i.transition == nil { + // Exclusive for its whole lifetime. That means it + // didn't escape. We can already handle nonescaping + // slices without this pass. + continue + } + if i.appendWeight < 2 { + // This optimization only really helps if there is + // (dynamically) more than one append. + continue + } + + // Commit point - at this point we've decided we *should* + // do the optimization. + + // Insert a move2heap operation before the exclusive->nonexclusive + // transition. + move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s) + if i.capUsed { + move.PreserveCapacity = true + } + move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0]) + move.SetType(i.s.Type()) + move.SetTypecheck(1) + as := ir.NewAssignStmt(i.transition.Pos(), i.s, move) + as.SetTypecheck(1) + i.transition.PtrInit().Prepend(as) + // Note: we prepend because we need to put the move2heap + // operation first, before any other init work, as the transition + // might occur in the init work. + + // Now that we've inserted a move2heap operation before every + // exclusive -> nonexclusive transition, appends can now use + // stack backing stores. + // (This is the whole point of this pass, to enable stack + // allocation of append backing stores.) + for _, a := range i.appends { + a.SetEsc(ir.EscNone) + if i.capUsed { + a.UseBuf = true + } + } + } +} + +// argLeak reports if the idx'th argument to the call n escapes anywhere +// (to the heap, another argument, return value, etc.) +// If unknown returns true. +func argLeak(n *ir.CallExpr, idx int) bool { + if n.Op() != ir.OCALLFUNC { + return true + } + fn := ir.StaticCalleeName(ir.StaticValue(n.Fun)) + if fn == nil { + return true + } + fntype := fn.Type() + if recv := fntype.Recv(); recv != nil { + if idx == 0 { + return escape.ParseLeaks(recv.Note).Any() + } + idx-- + } + return escape.ParseLeaks(fntype.Params()[idx].Note).Any() +} diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go index e42b54398db..b23ccc4a70a 100644 --- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go @@ -118,6 +118,7 @@ func init() { gp11sb = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly} gp21 = regInfo{inputs: []regMask{gp, gp}, outputs: gponly} gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly} + gp21sp2 = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly} gp21sb = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly} gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}} gp31shift = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}} @@ -262,7 +263,7 @@ func init() { {name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, {name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, - {name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true}, + {name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true}, {name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true}, {name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true}, {name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true}, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 944e1d78548..f13373d2c05 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -7643,7 +7643,7 @@ var opcodeTable = [...]opInfo{ reg: regInfo{ inputs: []inputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 - {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15 }, outputs: []outputInfo{ {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index 3dea733bbdb..96be8ddd864 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -124,6 +124,11 @@ func InitConfig() { ir.Syms.GCWriteBarrier[7] = typecheck.LookupRuntimeFunc("gcWriteBarrier8") ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded") ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice") + ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf") + ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice") + ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan") + ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap") + ir.Syms.MoveSliceNoCapNoScan = typecheck.LookupRuntimeFunc("moveSliceNoCapNoScan") ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch") for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ { ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i)) @@ -1091,6 +1096,23 @@ type state struct { // Block starting position, indexed by block id. blockStarts []src.XPos + + // Information for stack allocation. Indexed by the first argument + // to an append call. Normally a slice-typed variable, but not always. + backingStores map[ir.Node]*backingStoreInfo +} + +type backingStoreInfo struct { + // Size of backing store array (in elements) + K int64 + // Stack-allocated backing store variable. + store *ir.Name + // Dynamic boolean variable marking the fact that we used this backing store. + used *ir.Name + // Have we used this variable statically yet? This is just a hint + // to avoid checking the dynamic variable if the answer is obvious. + // (usedStatic == true implies used == true) + usedStatic bool } type funcLine struct { @@ -3673,6 +3695,9 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value { case ir.OAPPEND: return s.append(n.(*ir.CallExpr), false) + case ir.OMOVE2HEAP: + return s.move2heap(n.(*ir.MoveToHeapExpr)) + case ir.OMIN, ir.OMAX: return s.minMax(n.(*ir.CallExpr)) @@ -3734,6 +3759,68 @@ func (s *state) resultAddrOfCall(c *ssa.Value, which int64, t *types.Type) *ssa. return addr } +// Get backing store information for an append call. +func (s *state) getBackingStoreInfoForAppend(n *ir.CallExpr) *backingStoreInfo { + if n.Esc() != ir.EscNone { + return nil + } + return s.getBackingStoreInfo(n.Args[0]) +} +func (s *state) getBackingStoreInfo(n ir.Node) *backingStoreInfo { + t := n.Type() + et := t.Elem() + maxStackSize := int64(base.Debug.VariableMakeThreshold) + if et.Size() == 0 || et.Size() > maxStackSize { + return nil + } + if base.Flag.N != 0 { + return nil + } + if !base.VariableMakeHash.MatchPos(n.Pos(), nil) { + return nil + } + i := s.backingStores[n] + if i != nil { + return i + } + + // Build type of backing store. + K := maxStackSize / et.Size() // rounds down + KT := types.NewArray(et, K) + KT.SetNoalg(true) + types.CalcArraySize(KT) + // Align more than naturally for the type KT. See issue 73199. + align := types.NewArray(types.Types[types.TUINTPTR], 0) + types.CalcArraySize(align) + storeTyp := types.NewStruct([]*types.Field{ + {Sym: types.BlankSym, Type: align}, + {Sym: types.BlankSym, Type: KT}, + }) + storeTyp.SetNoalg(true) + types.CalcStructSize(storeTyp) + + // Make backing store variable. + backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp) + backingStore.SetAddrtaken(true) + + // Make "used" boolean. + used := typecheck.TempAt(n.Pos(), s.curfn, types.Types[types.TBOOL]) + if s.curBlock == s.f.Entry { + s.vars[used] = s.constBool(false) + } else { + // initialize this variable at end of entry block + s.defvars[s.f.Entry.ID][used] = s.constBool(false) + } + + // Initialize an info structure. + if s.backingStores == nil { + s.backingStores = map[ir.Node]*backingStoreInfo{} + } + i = &backingStoreInfo{K: K, store: backingStore, used: used, usedStatic: false} + s.backingStores[n] = i + return i +} + // append converts an OAPPEND node to SSA. // If inplace is false, it converts the OAPPEND expression n to an ssa.Value, // adds it to s, and returns the Value. @@ -3824,9 +3911,29 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // A stack-allocated backing store could be used at every // append that qualifies, but we limit it in some cases to // avoid wasted code and stack space. - // TODO: handle ... append case. - maxStackSize := int64(base.Debug.VariableMakeThreshold) - if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] { + // + // Note that we have two different strategies. + // 1. The standard strategy is just to allocate the full + // backing store at the first append. + // 2. An alternate strategy is used when + // a. The backing store eventually escapes via move2heap + // and b. The capacity is used somehow + // In this case, we don't want to just allocate + // the full buffer at the first append, because when + // we move2heap the buffer to the heap when it escapes, + // we might end up wasting memory because we can't + // change the capacity. + // So in this case we use growsliceBuf to reuse the buffer + // and walk one step up the size class ladder each time. + // + // TODO: handle ... append case? Currently we handle only + // a fixed number of appended elements. + var info *backingStoreInfo + if !inplace { + info = s.getBackingStoreInfoForAppend(n) + } + + if !inplace && info != nil && !n.UseBuf && !info.usedStatic { // if l <= K { // if !used { // if oldLen == 0 { @@ -3850,43 +3957,19 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // It is ok to do it more often, but it is probably helpful only for // the first instance. TODO: this could use more tuning. Using ir.Node // as the key works for *ir.Name instances but probably nothing else. - if s.appendTargets == nil { - s.appendTargets = map[ir.Node]bool{} - } - s.appendTargets[sn] = true - - K := maxStackSize / et.Size() // rounds down - KT := types.NewArray(et, K) - KT.SetNoalg(true) - types.CalcArraySize(KT) - // Align more than naturally for the type KT. See issue 73199. - align := types.NewArray(types.Types[types.TUINTPTR], 0) - types.CalcArraySize(align) - storeTyp := types.NewStruct([]*types.Field{ - {Sym: types.BlankSym, Type: align}, - {Sym: types.BlankSym, Type: KT}, - }) - storeTyp.SetNoalg(true) - types.CalcStructSize(storeTyp) + info.usedStatic = true + // TODO: unset usedStatic somehow? usedTestBlock := s.f.NewBlock(ssa.BlockPlain) oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain) bodyBlock := s.f.NewBlock(ssa.BlockPlain) growSlice := s.f.NewBlock(ssa.BlockPlain) - - // Make "used" boolean. - tBool := types.Types[types.TBOOL] - used := typecheck.TempAt(n.Pos(), s.curfn, tBool) - s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry - - // Make backing store variable. tInt := types.Types[types.TINT] - backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp) - backingStore.SetAddrtaken(true) + tBool := types.Types[types.TBOOL] // if l <= K s.startBlock(grow) - kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K)) + kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, info.K)) b := s.endBlock() b.Kind = ssa.BlockIf b.SetControl(kTest) @@ -3896,7 +3979,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // if !used s.startBlock(usedTestBlock) - usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used)) + usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(info.used)) b = s.endBlock() b.Kind = ssa.BlockIf b.SetControl(usedTest) @@ -3917,18 +4000,18 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // var store struct { _ [0]uintptr; arr [K]T } s.startBlock(bodyBlock) if et.HasPointers() { - s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem()) + s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, info.store, s.mem()) } - addr := s.addr(backingStore) - s.zero(storeTyp, addr) + addr := s.addr(info.store) + s.zero(info.store.Type(), addr) // s = store.arr[:l:K] s.vars[ptrVar] = addr s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test. - s.vars[capVar] = s.constInt(tInt, K) + s.vars[capVar] = s.constInt(tInt, info.K) // used = true - s.assign(used, s.constBool(true), false, 0) + s.assign(info.used, s.constBool(true), false, 0) b = s.endBlock() b.AddEdgeTo(assign) @@ -3939,7 +4022,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { // Call growslice s.startBlock(grow) taddr := s.expr(n.Fun) - r := s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr) + var r []*ssa.Value + if info != nil && n.UseBuf { + // Use stack-allocated buffer as backing store, if we can. + if et.HasPointers() && !info.usedStatic { + // Initialize in the function header. Not the best place, + // but it makes sure we don't scan this area before it is + // initialized. + mem := s.defvars[s.f.Entry.ID][memVar] + mem = s.f.Entry.NewValue1A(n.Pos(), ssa.OpVarDef, types.TypeMem, info.store, mem) + addr := s.f.Entry.NewValue2A(n.Pos(), ssa.OpLocalAddr, types.NewPtr(info.store.Type()), info.store, s.sp, mem) + mem = s.f.Entry.NewValue2I(n.Pos(), ssa.OpZero, types.TypeMem, info.store.Type().Size(), addr, mem) + mem.Aux = info.store.Type() + s.defvars[s.f.Entry.ID][memVar] = mem + info.usedStatic = true + } + r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K)) + } else { + r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr) + } // Decompose output slice p = s.newValue1(ssa.OpSlicePtr, pt, r[0]) @@ -4026,6 +4127,95 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value { return s.newValue3(ssa.OpSliceMake, n.Type(), p, l, c) } +func (s *state) move2heap(n *ir.MoveToHeapExpr) *ssa.Value { + // s := n.Slice + // if s.ptr points to current stack frame { + // s2 := make([]T, s.len, s.cap) + // copy(s2[:cap], s[:cap]) + // s = s2 + // } + // return s + + slice := s.expr(n.Slice) + et := slice.Type.Elem() + pt := types.NewPtr(et) + + info := s.getBackingStoreInfo(n) + if info == nil { + // Backing store will never be stack allocated, so + // move2heap is a no-op. + return slice + } + + // Decomposse input slice. + p := s.newValue1(ssa.OpSlicePtr, pt, slice) + l := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice) + c := s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], slice) + + moveBlock := s.f.NewBlock(ssa.BlockPlain) + mergeBlock := s.f.NewBlock(ssa.BlockPlain) + + s.vars[ptrVar] = p + s.vars[lenVar] = l + s.vars[capVar] = c + + // Decide if we need to move the slice backing store. + // It needs to be moved if it is currently on the stack. + sub := ssa.OpSub64 + less := ssa.OpLess64U + if s.config.PtrSize == 4 { + sub = ssa.OpSub32 + less = ssa.OpLess32U + } + callerSP := s.newValue1(ssa.OpGetCallerSP, types.Types[types.TUINTPTR], s.mem()) + frameSize := s.newValue2(sub, types.Types[types.TUINTPTR], callerSP, s.sp) + pInt := s.newValue2(ssa.OpConvert, types.Types[types.TUINTPTR], p, s.mem()) + off := s.newValue2(sub, types.Types[types.TUINTPTR], pInt, s.sp) + cond := s.newValue2(less, types.Types[types.TBOOL], off, frameSize) + + b := s.endBlock() + b.Kind = ssa.BlockIf + b.Likely = ssa.BranchUnlikely // fast path is to not have to call into runtime + b.SetControl(cond) + b.AddEdgeTo(moveBlock) + b.AddEdgeTo(mergeBlock) + + // Move the slice to heap + s.startBlock(moveBlock) + var newSlice *ssa.Value + if et.HasPointers() { + typ := s.expr(n.RType) + if n.PreserveCapacity { + newSlice = s.rtcall(ir.Syms.MoveSlice, true, []*types.Type{slice.Type}, typ, p, l, c)[0] + } else { + newSlice = s.rtcall(ir.Syms.MoveSliceNoCap, true, []*types.Type{slice.Type}, typ, p, l)[0] + } + } else { + elemSize := s.constInt(types.Types[types.TUINTPTR], et.Size()) + if n.PreserveCapacity { + newSlice = s.rtcall(ir.Syms.MoveSliceNoScan, true, []*types.Type{slice.Type}, elemSize, p, l, c)[0] + } else { + newSlice = s.rtcall(ir.Syms.MoveSliceNoCapNoScan, true, []*types.Type{slice.Type}, elemSize, p, l)[0] + } + } + // Decompose output slice + s.vars[ptrVar] = s.newValue1(ssa.OpSlicePtr, pt, newSlice) + s.vars[lenVar] = s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], newSlice) + s.vars[capVar] = s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], newSlice) + b = s.endBlock() + b.AddEdgeTo(mergeBlock) + + // Merge fast path (no moving) and slow path (moved) + s.startBlock(mergeBlock) + p = s.variable(ptrVar, pt) // generates phi for ptr + l = s.variable(lenVar, types.Types[types.TINT]) // generates phi for len + c = s.variable(capVar, types.Types[types.TINT]) // generates phi for cap + delete(s.vars, ptrVar) + delete(s.vars, lenVar) + delete(s.vars, capVar) + return s.newValue3(ssa.OpSliceMake, slice.Type, p, l, c) +} + // minMax converts an OMIN/OMAX builtin call into SSA. func (s *state) minMax(n *ir.CallExpr) *ssa.Value { // The OMIN/OMAX builtin is variadic, but its semantics are diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go index fbe8f77abd4..7988ebf5b93 100644 --- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go +++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go @@ -195,6 +195,7 @@ func makeslice(typ *byte, len int, cap int) unsafe.Pointer func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any) +func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any) func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64) func panicunsafeslicelen() func panicunsafeslicenilptr() @@ -202,6 +203,11 @@ func unsafestringcheckptr(ptr unsafe.Pointer, len int64) func panicunsafestringlen() func panicunsafestringnilptr() +func moveSlice(typ *byte, old *byte, len, cap int) (*byte, int, int) +func moveSliceNoScan(elemSize uintptr, old *byte, len, cap int) (*byte, int, int) +func moveSliceNoCap(typ *byte, old *byte, len int) (*byte, int, int) +func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int) + func memmove(to *any, frm *any, length uintptr) func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr) func memclrHasPointers(ptr unsafe.Pointer, n uintptr) diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go index ff72bdcf373..ee892856dd9 100644 --- a/src/cmd/compile/internal/typecheck/builtin.go +++ b/src/cmd/compile/internal/typecheck/builtin.go @@ -160,80 +160,85 @@ var runtimeDecls = [...]struct { {"makeslice64", funcTag, 124}, {"makeslicecopy", funcTag, 125}, {"growslice", funcTag, 127}, - {"unsafeslicecheckptr", funcTag, 128}, + {"growsliceBuf", funcTag, 128}, + {"unsafeslicecheckptr", funcTag, 129}, {"panicunsafeslicelen", funcTag, 9}, {"panicunsafeslicenilptr", funcTag, 9}, - {"unsafestringcheckptr", funcTag, 129}, + {"unsafestringcheckptr", funcTag, 130}, {"panicunsafestringlen", funcTag, 9}, {"panicunsafestringnilptr", funcTag, 9}, - {"memmove", funcTag, 130}, - {"memclrNoHeapPointers", funcTag, 131}, - {"memclrHasPointers", funcTag, 131}, - {"memequal", funcTag, 132}, - {"memequal0", funcTag, 133}, - {"memequal8", funcTag, 133}, - {"memequal16", funcTag, 133}, - {"memequal32", funcTag, 133}, - {"memequal64", funcTag, 133}, - {"memequal128", funcTag, 133}, - {"f32equal", funcTag, 134}, - {"f64equal", funcTag, 134}, - {"c64equal", funcTag, 134}, - {"c128equal", funcTag, 134}, - {"strequal", funcTag, 134}, - {"interequal", funcTag, 134}, - {"nilinterequal", funcTag, 134}, - {"memhash", funcTag, 135}, - {"memhash0", funcTag, 136}, - {"memhash8", funcTag, 136}, - {"memhash16", funcTag, 136}, - {"memhash32", funcTag, 136}, - {"memhash64", funcTag, 136}, - {"memhash128", funcTag, 136}, - {"f32hash", funcTag, 137}, - {"f64hash", funcTag, 137}, - {"c64hash", funcTag, 137}, - {"c128hash", funcTag, 137}, - {"strhash", funcTag, 137}, - {"interhash", funcTag, 137}, - {"nilinterhash", funcTag, 137}, - {"int64div", funcTag, 138}, - {"uint64div", funcTag, 139}, - {"int64mod", funcTag, 138}, - {"uint64mod", funcTag, 139}, - {"float64toint64", funcTag, 140}, - {"float64touint64", funcTag, 141}, - {"float64touint32", funcTag, 142}, - {"int64tofloat64", funcTag, 143}, - {"int64tofloat32", funcTag, 144}, - {"uint64tofloat64", funcTag, 145}, - {"uint64tofloat32", funcTag, 146}, - {"uint32tofloat64", funcTag, 147}, - {"complex128div", funcTag, 148}, + {"moveSlice", funcTag, 131}, + {"moveSliceNoScan", funcTag, 132}, + {"moveSliceNoCap", funcTag, 133}, + {"moveSliceNoCapNoScan", funcTag, 134}, + {"memmove", funcTag, 135}, + {"memclrNoHeapPointers", funcTag, 136}, + {"memclrHasPointers", funcTag, 136}, + {"memequal", funcTag, 137}, + {"memequal0", funcTag, 138}, + {"memequal8", funcTag, 138}, + {"memequal16", funcTag, 138}, + {"memequal32", funcTag, 138}, + {"memequal64", funcTag, 138}, + {"memequal128", funcTag, 138}, + {"f32equal", funcTag, 139}, + {"f64equal", funcTag, 139}, + {"c64equal", funcTag, 139}, + {"c128equal", funcTag, 139}, + {"strequal", funcTag, 139}, + {"interequal", funcTag, 139}, + {"nilinterequal", funcTag, 139}, + {"memhash", funcTag, 140}, + {"memhash0", funcTag, 141}, + {"memhash8", funcTag, 141}, + {"memhash16", funcTag, 141}, + {"memhash32", funcTag, 141}, + {"memhash64", funcTag, 141}, + {"memhash128", funcTag, 141}, + {"f32hash", funcTag, 142}, + {"f64hash", funcTag, 142}, + {"c64hash", funcTag, 142}, + {"c128hash", funcTag, 142}, + {"strhash", funcTag, 142}, + {"interhash", funcTag, 142}, + {"nilinterhash", funcTag, 142}, + {"int64div", funcTag, 143}, + {"uint64div", funcTag, 144}, + {"int64mod", funcTag, 143}, + {"uint64mod", funcTag, 144}, + {"float64toint64", funcTag, 145}, + {"float64touint64", funcTag, 146}, + {"float64touint32", funcTag, 147}, + {"int64tofloat64", funcTag, 148}, + {"int64tofloat32", funcTag, 149}, + {"uint64tofloat64", funcTag, 150}, + {"uint64tofloat32", funcTag, 151}, + {"uint32tofloat64", funcTag, 152}, + {"complex128div", funcTag, 153}, {"racefuncenter", funcTag, 33}, {"racefuncexit", funcTag, 9}, {"raceread", funcTag, 33}, {"racewrite", funcTag, 33}, - {"racereadrange", funcTag, 149}, - {"racewriterange", funcTag, 149}, - {"msanread", funcTag, 149}, - {"msanwrite", funcTag, 149}, - {"msanmove", funcTag, 150}, - {"asanread", funcTag, 149}, - {"asanwrite", funcTag, 149}, - {"checkptrAlignment", funcTag, 151}, - {"checkptrArithmetic", funcTag, 153}, - {"libfuzzerTraceCmp1", funcTag, 154}, - {"libfuzzerTraceCmp2", funcTag, 155}, - {"libfuzzerTraceCmp4", funcTag, 156}, - {"libfuzzerTraceCmp8", funcTag, 157}, - {"libfuzzerTraceConstCmp1", funcTag, 154}, - {"libfuzzerTraceConstCmp2", funcTag, 155}, - {"libfuzzerTraceConstCmp4", funcTag, 156}, - {"libfuzzerTraceConstCmp8", funcTag, 157}, - {"libfuzzerHookStrCmp", funcTag, 158}, - {"libfuzzerHookEqualFold", funcTag, 158}, - {"addCovMeta", funcTag, 160}, + {"racereadrange", funcTag, 154}, + {"racewriterange", funcTag, 154}, + {"msanread", funcTag, 154}, + {"msanwrite", funcTag, 154}, + {"msanmove", funcTag, 155}, + {"asanread", funcTag, 154}, + {"asanwrite", funcTag, 154}, + {"checkptrAlignment", funcTag, 156}, + {"checkptrArithmetic", funcTag, 158}, + {"libfuzzerTraceCmp1", funcTag, 159}, + {"libfuzzerTraceCmp2", funcTag, 160}, + {"libfuzzerTraceCmp4", funcTag, 161}, + {"libfuzzerTraceCmp8", funcTag, 162}, + {"libfuzzerTraceConstCmp1", funcTag, 159}, + {"libfuzzerTraceConstCmp2", funcTag, 160}, + {"libfuzzerTraceConstCmp4", funcTag, 161}, + {"libfuzzerTraceConstCmp8", funcTag, 162}, + {"libfuzzerHookStrCmp", funcTag, 163}, + {"libfuzzerHookEqualFold", funcTag, 163}, + {"addCovMeta", funcTag, 165}, {"x86HasPOPCNT", varTag, 6}, {"x86HasSSE41", varTag, 6}, {"x86HasFMA", varTag, 6}, @@ -243,11 +248,11 @@ var runtimeDecls = [...]struct { {"loong64HasLAM_BH", varTag, 6}, {"loong64HasLSX", varTag, 6}, {"riscv64HasZbb", varTag, 6}, - {"asanregisterglobals", funcTag, 131}, + {"asanregisterglobals", funcTag, 136}, } func runtimeTypes() []*types.Type { - var typs [161]*types.Type + var typs [166]*types.Type typs[0] = types.ByteType typs[1] = types.NewPtr(typs[0]) typs[2] = types.Types[types.TANY] @@ -376,39 +381,44 @@ func runtimeTypes() []*types.Type { typs[125] = newSig(params(typs[1], typs[13], typs[13], typs[7]), params(typs[7])) typs[126] = types.NewSlice(typs[2]) typs[127] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1]), params(typs[126])) - typs[128] = newSig(params(typs[1], typs[7], typs[22]), nil) - typs[129] = newSig(params(typs[7], typs[22]), nil) - typs[130] = newSig(params(typs[3], typs[3], typs[5]), nil) - typs[131] = newSig(params(typs[7], typs[5]), nil) - typs[132] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6])) - typs[133] = newSig(params(typs[3], typs[3]), params(typs[6])) - typs[134] = newSig(params(typs[7], typs[7]), params(typs[6])) - typs[135] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5])) - typs[136] = newSig(params(typs[7], typs[5]), params(typs[5])) - typs[137] = newSig(params(typs[3], typs[5]), params(typs[5])) - typs[138] = newSig(params(typs[22], typs[22]), params(typs[22])) - typs[139] = newSig(params(typs[24], typs[24]), params(typs[24])) - typs[140] = newSig(params(typs[18]), params(typs[22])) - typs[141] = newSig(params(typs[18]), params(typs[24])) - typs[142] = newSig(params(typs[18]), params(typs[67])) - typs[143] = newSig(params(typs[22]), params(typs[18])) - typs[144] = newSig(params(typs[22]), params(typs[20])) - typs[145] = newSig(params(typs[24]), params(typs[18])) - typs[146] = newSig(params(typs[24]), params(typs[20])) - typs[147] = newSig(params(typs[67]), params(typs[18])) - typs[148] = newSig(params(typs[26], typs[26]), params(typs[26])) - typs[149] = newSig(params(typs[5], typs[5]), nil) - typs[150] = newSig(params(typs[5], typs[5], typs[5]), nil) - typs[151] = newSig(params(typs[7], typs[1], typs[5]), nil) - typs[152] = types.NewSlice(typs[7]) - typs[153] = newSig(params(typs[7], typs[152]), nil) - typs[154] = newSig(params(typs[71], typs[71], typs[15]), nil) - typs[155] = newSig(params(typs[65], typs[65], typs[15]), nil) - typs[156] = newSig(params(typs[67], typs[67], typs[15]), nil) - typs[157] = newSig(params(typs[24], typs[24], typs[15]), nil) - typs[158] = newSig(params(typs[30], typs[30], typs[15]), nil) - typs[159] = types.NewArray(typs[0], 16) - typs[160] = newSig(params(typs[7], typs[67], typs[159], typs[30], typs[13], typs[71], typs[71]), params(typs[67])) + typs[128] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1], typs[3], typs[13]), params(typs[126])) + typs[129] = newSig(params(typs[1], typs[7], typs[22]), nil) + typs[130] = newSig(params(typs[7], typs[22]), nil) + typs[131] = newSig(params(typs[1], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13])) + typs[132] = newSig(params(typs[5], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13])) + typs[133] = newSig(params(typs[1], typs[1], typs[13]), params(typs[1], typs[13], typs[13])) + typs[134] = newSig(params(typs[5], typs[1], typs[13]), params(typs[1], typs[13], typs[13])) + typs[135] = newSig(params(typs[3], typs[3], typs[5]), nil) + typs[136] = newSig(params(typs[7], typs[5]), nil) + typs[137] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6])) + typs[138] = newSig(params(typs[3], typs[3]), params(typs[6])) + typs[139] = newSig(params(typs[7], typs[7]), params(typs[6])) + typs[140] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5])) + typs[141] = newSig(params(typs[7], typs[5]), params(typs[5])) + typs[142] = newSig(params(typs[3], typs[5]), params(typs[5])) + typs[143] = newSig(params(typs[22], typs[22]), params(typs[22])) + typs[144] = newSig(params(typs[24], typs[24]), params(typs[24])) + typs[145] = newSig(params(typs[18]), params(typs[22])) + typs[146] = newSig(params(typs[18]), params(typs[24])) + typs[147] = newSig(params(typs[18]), params(typs[67])) + typs[148] = newSig(params(typs[22]), params(typs[18])) + typs[149] = newSig(params(typs[22]), params(typs[20])) + typs[150] = newSig(params(typs[24]), params(typs[18])) + typs[151] = newSig(params(typs[24]), params(typs[20])) + typs[152] = newSig(params(typs[67]), params(typs[18])) + typs[153] = newSig(params(typs[26], typs[26]), params(typs[26])) + typs[154] = newSig(params(typs[5], typs[5]), nil) + typs[155] = newSig(params(typs[5], typs[5], typs[5]), nil) + typs[156] = newSig(params(typs[7], typs[1], typs[5]), nil) + typs[157] = types.NewSlice(typs[7]) + typs[158] = newSig(params(typs[7], typs[157]), nil) + typs[159] = newSig(params(typs[71], typs[71], typs[15]), nil) + typs[160] = newSig(params(typs[65], typs[65], typs[15]), nil) + typs[161] = newSig(params(typs[67], typs[67], typs[15]), nil) + typs[162] = newSig(params(typs[24], typs[24], typs[15]), nil) + typs[163] = newSig(params(typs[30], typs[30], typs[15]), nil) + typs[164] = types.NewArray(typs[0], 16) + typs[165] = newSig(params(typs[7], typs[67], typs[164], typs[30], typs[13], typs[71], typs[71]), params(typs[67])) return typs[:] } diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go index 989ae0a1db2..2794671c73b 100644 --- a/src/cmd/compile/internal/walk/expr.go +++ b/src/cmd/compile/internal/walk/expr.go @@ -351,6 +351,11 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node { case ir.OMETHVALUE: return walkMethodValue(n.(*ir.SelectorExpr), init) + + case ir.OMOVE2HEAP: + n := n.(*ir.MoveToHeapExpr) + n.Slice = walkExpr(n.Slice, init) + return n } // No return! Each case must return (or panic), diff --git a/src/runtime/slice.go b/src/runtime/slice.go index e31d5dccb24..a9e8fc16109 100644 --- a/src/runtime/slice.go +++ b/src/runtime/slice.go @@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte { cap := roundupsize(uintptr(len), true) return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len] } + +// moveSlice copies the input slice to the heap and returns it. +// et is the element type of the slice. +func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * et.Size_ + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoScan is like moveSlice except the element type is known to +// not have any pointers. We instead pass in the size of the element. +func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) { + if cap == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + capmem := uintptr(cap) * elemSize + new := mallocgc(capmem, nil, false) + memmove(new, old, capmem) + return new, len, cap +} + +// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity +// for the returned slice. +// Elements between len and cap in the returned slice will be zeroed. +func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * et.Size_ + capmem := roundupsize(lenmem, false) + new := mallocgc(capmem, et, true) + bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et) + memmove(new, old, lenmem) + return new, len, int(capmem / et.Size_) +} + +// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap. +func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) { + if len == 0 { + if old != nil { + old = unsafe.Pointer(&zerobase) + } + return old, 0, 0 + } + lenmem := uintptr(len) * elemSize + capmem := roundupsize(lenmem, true) + new := mallocgc(capmem, nil, false) + memmove(new, old, lenmem) + if capmem > lenmem { + memclrNoHeapPointers(add(new, lenmem), capmem-lenmem) + } + return new, len, int(capmem / elemSize) +} + +// growsliceBuf is like growslice, but we can use the given buffer +// as a backing store if we want. bufPtr must be on the stack. +func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice { + if newLen > bufLen { + // Doesn't fit, process like a normal growslice. + return growslice(oldPtr, newLen, oldCap, num, et) + } + oldLen := newLen - num + if oldPtr != bufPtr && oldLen != 0 { + // Move data to start of buffer. + // Note: bufPtr is on the stack, so no write barrier needed. + memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_) + } + // Pick a new capacity. + // + // Unlike growslice, we don't need to double the size each time. + // The work done here is not proportional to the length of the slice. + // (Unless the memmove happens above, but that is rare, and in any + // case there are not many elements on this path.) + // + // Instead, we try to just bump up to the next size class. + // This will ensure that we don't waste any space when we eventually + // call moveSlice with the resulting slice. + newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_) + + // Zero slice beyond newLen. + // The buffer is stack memory, so NoHeapPointers is ok. + // Caller will overwrite [oldLen:newLen], so we don't need to zero that portion. + // If et.Pointers(), buffer is at least initialized so we don't need to + // worry about the caller overwriting junk in [oldLen:newLen]. + if newLen < newCap { + memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_) + } + + return slice{bufPtr, newLen, newCap} +} diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go index cd2bc26d1eb..5463b6c02fb 100644 --- a/src/runtime/slice_test.go +++ b/src/runtime/slice_test.go @@ -6,6 +6,9 @@ package runtime_test import ( "fmt" + "internal/race" + "internal/testenv" + "runtime" "testing" ) @@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) { }) } + +//go:noinline +func byteSlice(n int) []byte { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r +} +func TestAppendByteInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48. + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r = byteSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func ptrSlice(n int, p *[]*byte) { + var r []*byte + for range n { + r = append(r, nil) + } + *p = r +} +func TestAppendPtrInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + var tests [][3]int + if runtime.PtrSize == 8 { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 1}, + {2, 1, 2}, + {3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32. + {4, 1, 4}, + {5, 1, 8}, + {6, 1, 8}, + {7, 1, 8}, + {8, 1, 8}, + {9, 2, 16}, + } + } else { + tests = [][3]int{ + {0, 0, 0}, + {1, 1, 2}, + {2, 1, 2}, + {3, 1, 4}, + {4, 1, 4}, + {5, 1, 6}, // These two are also 24 bytes instead of 32. + {6, 1, 6}, // + {7, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {10, 1, 16}, + {11, 1, 16}, + {12, 1, 16}, + {13, 1, 16}, + {14, 1, 16}, + {15, 1, 16}, + {16, 1, 16}, + {17, 2, 32}, + } + } + for _, test := range tests { + n := test[0] + want := test[1] + wantCap := test[2] + var r []*byte + got := testing.AllocsPerRun(10, func() { + ptrSlice(n, &r) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +//go:noinline +func byteCapSlice(n int) ([]byte, int) { + var r []byte + for i := range n { + r = append(r, byte(i)) + } + return r, cap(r) +} +func TestAppendByteCapInLoop(t *testing.T) { + testenv.SkipIfOptimizationOff(t) + if race.Enabled { + t.Skip("skipping in -race mode") + } + for _, test := range [][3]int{ + {0, 0, 0}, + {1, 1, 8}, + {2, 1, 8}, + {8, 1, 8}, + {9, 1, 16}, + {16, 1, 16}, + {17, 1, 24}, + {24, 1, 24}, + {25, 1, 32}, + {32, 1, 32}, + {33, 1, 64}, + {48, 1, 64}, + {49, 1, 64}, + {64, 1, 64}, + {65, 2, 128}, + } { + n := test[0] + want := test[1] + wantCap := test[2] + var r []byte + got := testing.AllocsPerRun(10, func() { + r, _ = byteCapSlice(n) + }) + if got != float64(want) { + t.Errorf("for size %d, got %f allocs want %d", n, got, want) + } + if cap(r) != wantCap { + t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap) + } + } +} + +func TestAppendGeneric(t *testing.T) { + type I *int + r := testAppendGeneric[I](100) + if len(r) != 100 { + t.Errorf("bad length") + } +} + +//go:noinline +func testAppendGeneric[E any](n int) []E { + var r []E + var z E + for range n { + r = append(r, z) + } + return r +} + +func appendSomeBytes(r []byte, s []byte) []byte { + for _, b := range s { + r = append(r, b) + } + return r +} + +func TestAppendOfArg(t *testing.T) { + r := make([]byte, 24) + for i := 0; i < 24; i++ { + r[i] = byte(i) + } + appendSomeBytes(r, []byte{25, 26, 27}) + // Do the same thing, trying to overwrite any + // stack-allocated buffers used above. + s := make([]byte, 24) + for i := 0; i < 24; i++ { + s[i] = 99 + } + appendSomeBytes(s, []byte{99, 99, 99}) + // Check that we still have the right data. + for i, b := range r { + if b != byte(i) { + t.Errorf("r[%d]=%d, want %d", i, b, byte(i)) + } + } + +} + +func BenchmarkAppendInLoop(b *testing.B) { + for _, size := range []int{0, 1, 8, 16, 32, 64, 128} { + b.Run(fmt.Sprintf("%d", size), + func(b *testing.B) { + b.ReportAllocs() + for b.Loop() { + byteSlice(size) + } + }) + } +} + +func TestMoveToHeapEarly(t *testing.T) { + // Just checking that this compiles. + var x []int + y := x // causes a move2heap in the entry block + for range 5 { + x = append(x, 5) + } + _ = y +} + +func TestMoveToHeapCap(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = cap(s) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func runit(f func()) { + f() +} + +func TestMoveToHeapClosure1(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + runit(func() { + c = cap(s) + }) + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} +func TestMoveToHeapClosure2(t *testing.T) { + var c int + r := func() []byte { + var s []byte + for i := range 10 { + s = append(s, byte(i)) + } + c = func() int { + return cap(s) + }() + return s + }() + if c != cap(r) { + t.Errorf("got cap=%d, want %d", c, cap(r)) + } + sinkSlice = r +} + +//go:noinline +func buildClosure(t *testing.T) ([]byte, func()) { + var s []byte + for i := range 20 { + s = append(s, byte(i)) + } + c := func() { + for i, b := range s { + if b != byte(i) { + t.Errorf("s[%d]=%d, want %d", i, b, i) + } + } + } + return s, c +} + +func TestMoveToHeapClosure3(t *testing.T) { + _, f := buildClosure(t) + overwriteStack(0) + f() +} + +//go:noinline +func overwriteStack(n int) uint64 { + var x [100]uint64 + for i := range x { + x[i] = 0xabcdabcdabcdabcd + } + return x[n] +} + +var sinkSlice []byte diff --git a/test/codegen/append.go b/test/codegen/append.go new file mode 100644 index 00000000000..0e58a48c458 --- /dev/null +++ b/test/codegen/append.go @@ -0,0 +1,190 @@ +// asmcheck + +// Copyright 2025 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func Append1(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append2(n int) (r []int) { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return +} + +func Append3(n int) (r []int) { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append4(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + println(cap(r)) + // amd64:`.*moveSliceNoScan` + return r +} + +func Append5(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + useSlice(r) + // amd64:`.*moveSliceNoScan` + return r +} + +func Append6(n int) []*int { + var r []*int + for i := range n { + // amd64:`.*growslice` + r = append(r, new(i)) + } + // amd64:`.*moveSliceNoCap` + return r +} + +func Append7(n int) []*int { + var r []*int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, new(i)) + } + println(cap(r)) + // amd64:`.*moveSlice` + return r +} + +func Append8(n int, p *[]int) { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + *p = r +} + +func Append9(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + println(len(r)) + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append10(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + println(r[3]) + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append11(n int) []int { + var r []int + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + r = r[3:5] + // amd64:`.*moveSliceNoScan` + return r +} + +func Append12(n int) []int { + var r []int + r = nil + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append13(n int) []int { + var r []int + r, r = nil, nil + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append14(n int) []int { + var r []int + r = []int{3, 4, 5} + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + // amd64:`.*moveSliceNoScan` + return r +} + +func Append15(n int) []int { + r := []int{3, 4, 5} + for i := range n { + // amd64:`.*growsliceBuf` + r = append(r, i) + } + // amd64:`.*moveSliceNoScan` + return r +} + +func Append16(r []int, n int) []int { + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +func Append17(n int) []int { + var r []int + for i := range n { + // amd64:`.*growslice` + r = append(r, i) + } + for i, x := range r { + println(i, x) + } + // amd64:`.*moveSliceNoCapNoScan` + return r +} + +//go:noinline +func useSlice(s []int) { +}