diff --git a/src/cmd/compile/internal/deadlocals/deadlocals.go b/src/cmd/compile/internal/deadlocals/deadlocals.go
index 238450416a9..55ad0387a4d 100644
--- a/src/cmd/compile/internal/deadlocals/deadlocals.go
+++ b/src/cmd/compile/internal/deadlocals/deadlocals.go
@@ -44,6 +44,11 @@ func Funcs(fns []*ir.Func) {
 				*as.lhs = ir.BlankNode
 				*as.rhs = zero
 			}
+			if len(assigns) > 0 {
+				// k.Defn might be pointing at one of the
+				// assignments we're overwriting.
+				k.Defn = nil
+			}
 		}
 	}
 }
diff --git a/src/cmd/compile/internal/escape/leaks.go b/src/cmd/compile/internal/escape/leaks.go
index 942f87d2a22..176bccd8470 100644
--- a/src/cmd/compile/internal/escape/leaks.go
+++ b/src/cmd/compile/internal/escape/leaks.go
@@ -124,3 +124,21 @@ func parseLeaks(s string) leaks {
 	copy(l[:], s[4:])
 	return l
 }
+
+func ParseLeaks(s string) leaks {
+	return parseLeaks(s)
+}
+
+// Any reports whether the value flows anywhere at all.
+func (l leaks) Any() bool {
+	// TODO: do mutator/callee matter?
+	if l.Heap() >= 0 || l.Mutator() >= 0 || l.Callee() >= 0 {
+		return true
+	}
+	for i := range numEscResults {
+		if l.Result(i) >= 0 {
+			return true
+		}
+	}
+	return false
+}
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 42e2afaee4f..ef6a5d6017c 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -22,6 +22,7 @@ import (
 	"cmd/compile/internal/pkginit"
 	"cmd/compile/internal/reflectdata"
 	"cmd/compile/internal/rttype"
+	"cmd/compile/internal/slice"
 	"cmd/compile/internal/ssa"
 	"cmd/compile/internal/ssagen"
 	"cmd/compile/internal/staticinit"
@@ -266,6 +267,8 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	base.Timer.Start("fe", "escapes")
 	escape.Funcs(typecheck.Target.Funcs)
 
+	slice.Funcs(typecheck.Target.Funcs)
+
 	loopvar.LogTransformations(transformed)
 
 	// Collect information for go:nowritebarrierrec
diff --git a/src/cmd/compile/internal/ir/expr.go b/src/cmd/compile/internal/ir/expr.go
index 7a75ff40f2d..dd1b94aa0da 100644
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@@ -192,6 +192,7 @@ type CallExpr struct {
 	IsDDD     bool
 	GoDefer   bool // whether this call is part of a go or defer statement
 	NoInline  bool // whether this call must not be inlined
+	UseBuf    bool // use stack buffer for backing store (OAPPEND only)
 }
 
 func NewCallExpr(pos src.XPos, op Op, fun Node, args []Node) *CallExpr {
@@ -1269,3 +1270,28 @@ func MethodExprFunc(n Node) *types.Field {
 	base.Fatalf("unexpected node: %v (%v)", n, n.Op())
 	panic("unreachable")
 }
+
+// A MoveToHeapExpr takes a slice as input and moves it to the
+// heap (by copying the backing store if it is not already
+// on the heap).
+type MoveToHeapExpr struct {
+	miniExpr
+	Slice Node
+	// An expression that evaluates to a *runtime._type
+	// that represents the slice element type.
+	RType Node
+	// If PreserveCapacity is true, the capacity of
+	// the resulting slice, and all of the elements in
+	// [len:cap], must be preserved.
+	// If PreserveCapacity is false, the resulting
+	// slice may have any capacity >= len, with any
+	// elements in the resulting [len:cap] range zeroed.
+	PreserveCapacity bool
+}
+
+func NewMoveToHeapExpr(pos src.XPos, slice Node) *MoveToHeapExpr {
+	n := &MoveToHeapExpr{Slice: slice}
+	n.pos = pos
+	n.op = OMOVE2HEAP
+	return n
+}
diff --git a/src/cmd/compile/internal/ir/name.go b/src/cmd/compile/internal/ir/name.go
index 01f1c0c5022..63f1b1c931c 100644
--- a/src/cmd/compile/internal/ir/name.go
+++ b/src/cmd/compile/internal/ir/name.go
@@ -43,7 +43,7 @@ type Name struct {
 	Func      *Func // TODO(austin): nil for I.M
 	Offset_   int64
 	val       constant.Value
-	Opt       any      // for use by escape analysis
+	Opt       any      // for use by escape or slice analysis
 	Embed     *[]Embed // list of embedded files, for ONAME var
 
 	// For a local variable (not param) or extern, the initializing assignment (OAS or OAS2).
diff --git a/src/cmd/compile/internal/ir/node.go b/src/cmd/compile/internal/ir/node.go
index 8c61bb6ed5a..f26f61cb18a 100644
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@@ -293,6 +293,7 @@ const (
 	OLINKSYMOFFSET   // offset within a name
 	OJUMPTABLE       // A jump table structure for implementing dense expression switches
 	OINTERFACESWITCH // A type switch with interface cases
+	OMOVE2HEAP       // Promote a stack-backed slice to heap
 
 	// opcodes for generics
 	ODYNAMICDOTTYPE  // x = i.(T) where T is a type parameter (or derived from a type parameter)
diff --git a/src/cmd/compile/internal/ir/node_gen.go b/src/cmd/compile/internal/ir/node_gen.go
index 2221045c93d..4298b3a43d7 100644
--- a/src/cmd/compile/internal/ir/node_gen.go
+++ b/src/cmd/compile/internal/ir/node_gen.go
@@ -1175,6 +1175,34 @@ func (n *MakeExpr) editChildrenWithHidden(edit func(Node) Node) {
 	}
 }
 
+func (n *MoveToHeapExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *MoveToHeapExpr) copy() Node {
+	c := *n
+	c.init = copyNodes(c.init)
+	return &c
+}
+func (n *MoveToHeapExpr) doChildren(do func(Node) bool) bool {
+	if doNodes(n.init, do) {
+		return true
+	}
+	if n.Slice != nil && do(n.Slice) {
+		return true
+	}
+	return false
+}
+func (n *MoveToHeapExpr) doChildrenWithHidden(do func(Node) bool) bool {
+	return n.doChildren(do)
+}
+func (n *MoveToHeapExpr) editChildren(edit func(Node) Node) {
+	editNodes(n.init, edit)
+	if n.Slice != nil {
+		n.Slice = edit(n.Slice).(Node)
+	}
+}
+func (n *MoveToHeapExpr) editChildrenWithHidden(edit func(Node) Node) {
+	n.editChildren(edit)
+}
+
 func (n *Name) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
 
 func (n *NilExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
diff --git a/src/cmd/compile/internal/ir/op_string.go b/src/cmd/compile/internal/ir/op_string.go
index 7494beee4c5..f042ad84a40 100644
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@@ -151,18 +151,19 @@ func _() {
 	_ = x[OLINKSYMOFFSET-140]
 	_ = x[OJUMPTABLE-141]
 	_ = x[OINTERFACESWITCH-142]
-	_ = x[ODYNAMICDOTTYPE-143]
-	_ = x[ODYNAMICDOTTYPE2-144]
-	_ = x[ODYNAMICTYPE-145]
-	_ = x[OTAILCALL-146]
-	_ = x[OGETG-147]
-	_ = x[OGETCALLERSP-148]
-	_ = x[OEND-149]
+	_ = x[OMOVE2HEAP-143]
+	_ = x[ODYNAMICDOTTYPE-144]
+	_ = x[ODYNAMICDOTTYPE2-145]
+	_ = x[ODYNAMICTYPE-146]
+	_ = x[OTAILCALL-147]
+	_ = x[OGETG-148]
+	_ = x[OGETCALLERSP-149]
+	_ = x[OEND-150]
 }
 
-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHMOVE2HEAPDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
 
-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 878, 892, 907, 918, 926, 930, 941, 944}
 
 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
diff --git a/src/cmd/compile/internal/ir/stmt.go b/src/cmd/compile/internal/ir/stmt.go
index 0801ecdd9e8..affa5f4551e 100644
--- a/src/cmd/compile/internal/ir/stmt.go
+++ b/src/cmd/compile/internal/ir/stmt.go
@@ -42,6 +42,7 @@ func (*Decl) isStmt() {}
 type Stmt interface {
 	Node
 	isStmt()
+	PtrInit() *Nodes
 }
 
 // A miniStmt is a miniNode with extra fields common to statements.
diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go
index f8eb4578809..828c3b553a6 100644
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@@ -29,6 +29,11 @@ type symsStruct struct {
 	GCWriteBarrier            [8]*obj.LSym
 	Goschedguarded            *obj.LSym
 	Growslice                 *obj.LSym
+	GrowsliceBuf              *obj.LSym
+	MoveSlice                 *obj.LSym
+	MoveSliceNoScan           *obj.LSym
+	MoveSliceNoCap            *obj.LSym
+	MoveSliceNoCapNoScan      *obj.LSym
 	InterfaceSwitch           *obj.LSym
 	MallocGC                  *obj.LSym
 	MallocGCSmallNoScan       [27]*obj.LSym
diff --git a/src/cmd/compile/internal/slice/slice.go b/src/cmd/compile/internal/slice/slice.go
new file mode 100644
index 00000000000..7a32e7adbd2
--- /dev/null
+++ b/src/cmd/compile/internal/slice/slice.go
@@ -0,0 +1,455 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slice
+
+// This file implements a stack-allocation optimization
+// for the backing store of slices.
+//
+// Consider the code:
+//
+//     var s []int
+//     for i := range ... {
+//        s = append(s, i)
+//     }
+//     return s
+//
+// Some of the append operations will need to do an allocation
+// by calling growslice. This will happen on the 1st, 2nd, 4th,
+// 8th, etc. append calls. The allocations done by all but the
+// last growslice call will then immediately be garbage.
+//
+// We'd like to avoid doing some of those intermediate
+// allocations if possible.
+//
+// If we can determine that the "return s" statement is the
+// *only* way that the backing store for s escapes, then we
+// can rewrite the code to something like:
+//
+//     var s []int
+//     for i := range N {
+//        s = append(s, i)
+//     }
+//     s = move2heap(s)
+//     return s
+//
+// Using the move2heap runtime function, which does:
+//
+//     move2heap(s):
+//         If s is not backed by a stackframe-allocated
+//         backing store, return s. Otherwise, copy s
+//         to the heap and return the copy.
+//
+// Now we can treat the backing store of s allocated at the
+// append site as not escaping. Previous stack allocation
+// optimizations now apply, which can use a fixed-size
+// stack-allocated backing store for s when appending.
+// (See ../ssagen/ssa.go:(*state).append)
+//
+// It is tricky to do this optimization safely. To describe
+// our analysis, we first define what an "exclusive" slice
+// variable is.
+//
+// A slice variable (a variable of slice type) is called
+// "exclusive" if, when it has a reference to a
+// stackframe-allocated backing store, it is the only
+// variable with such a reference.
+//
+// In other words, a slice variable is exclusive if
+// any of the following holds:
+//  1) It points to a heap-allocated backing store
+//  2) It points to a stack-allocated backing store
+//     for any parent frame.
+//  3) It is the only variable that references its
+//     backing store.
+//  4) It is nil.
+//
+// The nice thing about exclusive slice variables is that
+// it is always safe to do
+//    s = move2heap(s)
+// whenever s is an exclusive slice variable. Because no
+// one else has a reference to the backing store, no one
+// else can tell that we moved the backing store from one
+// location to another.
+//
+// Note that exclusiveness is a dynamic property. A slice
+// variable may be exclusive during some parts of execution
+// and not exclusive during others.
+//
+// The following operations set or preserve the exclusivity
+// of a slice variable s:
+//     s = nil
+//     s = append(s, ...)
+//     s = s[i:j]
+//     ... = s[i]
+//     s[i] = ...
+//     f(s) where f does not escape its argument
+// Other operations destroy exclusivity. A non-exhaustive list includes:
+//     x = s
+//     *p = s
+//     f(s) where f escapes its argument
+//     return s
+// To err on the safe side, we white list exclusivity-preserving
+// operations and we asssume that any other operations that mention s
+// destroy its exclusivity.
+//
+// Our strategy is to move the backing store of s to the heap before
+// any exclusive->nonexclusive transition. That way, s will only ever
+// have a reference to a stack backing store while it is exclusive.
+//
+// move2heap for a variable s is implemented with:
+//     if s points to within the stack frame {
+//         s2 := make([]T, s.len, s.cap)
+//         copy(s2[:s.cap], s[:s.cap])
+//         s = s2
+//     }
+// Note that in general we need to copy all of s[:cap(s)] elements when
+// moving to the heap. As an optimization, we keep track of slice variables
+// whose capacity, and the elements in s[len(s):cap(s)], are never accessed.
+// For those slice variables, we can allocate to the next size class above
+// the length, which saves memory and copying cost.
+
+import (
+	"cmd/compile/internal/base"
+	"cmd/compile/internal/escape"
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/reflectdata"
+)
+
+func Funcs(all []*ir.Func) {
+	if base.Flag.N != 0 {
+		return
+	}
+	for _, fn := range all {
+		analyze(fn)
+	}
+}
+
+func analyze(fn *ir.Func) {
+	type sliceInfo struct {
+		// Slice variable.
+		s *ir.Name
+
+		// Count of uses that this pass understands.
+		okUses int32
+		// Count of all uses found.
+		allUses int32
+
+		// A place where the slice variable transitions from
+		// exclusive to nonexclusive.
+		// We could keep track of more than one, but one is enough for now.
+		// Currently, this can be either a return statement or
+		// an assignment.
+		// TODO: other possible transitions?
+		transition ir.Stmt
+
+		// Each s = append(s, ...) instance we found.
+		appends []*ir.CallExpr
+
+		// Weight of the number of s = append(s, ...) instances we found.
+		// The optimizations we do are only really useful if there are at
+		// least weight 2. (Note: appends in loops have weight >= 2.)
+		appendWeight int
+
+		// Whether we ever do cap(s), or other operations that use cap(s)
+		// (possibly implicitly), like s[i:j].
+		capUsed bool
+	}
+
+	// Every variable (*ir.Name) that we are tracking will have
+	// a non-nil *sliceInfo in its Opt field.
+	haveLocalSlice := false
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	var namedRets []*ir.Name
+	for _, s := range fn.Dcl {
+		if !s.Type().IsSlice() {
+			continue
+		}
+		if s.Type().Elem().Size() > maxStackSize {
+			continue
+		}
+		if !base.VariableMakeHash.MatchPos(s.Pos(), nil) {
+			continue
+		}
+		s.Opt = &sliceInfo{s: s} // start tracking s
+		haveLocalSlice = true
+		if s.Class == ir.PPARAMOUT {
+			namedRets = append(namedRets, s)
+		}
+	}
+	if !haveLocalSlice {
+		return
+	}
+
+	// Keep track of loop depth while walking.
+	loopDepth := 0
+
+	// tracking returns the info for the slice variable if n is a slice
+	// variable that we're still considering, or nil otherwise.
+	tracking := func(n ir.Node) *sliceInfo {
+		if n == nil || n.Op() != ir.ONAME {
+			return nil
+		}
+		s := n.(*ir.Name)
+		if s.Opt == nil {
+			return nil
+		}
+		return s.Opt.(*sliceInfo)
+	}
+
+	// addTransition(n, loc) records that s experiences an exclusive->nonexclusive
+	// transition somewhere within loc.
+	addTransition := func(i *sliceInfo, loc ir.Stmt) {
+		if i.transition != nil {
+			// We only keep track of a single exclusive->nonexclusive transition
+			// for a slice variable. If we find more than one, give up.
+			// (More than one transition location would be fine, but we would
+			// start to get worried about introducing too much additional code.)
+			i.s.Opt = nil
+			return
+		}
+		i.transition = loc
+	}
+
+	// Examine an x = y assignment that occurs somewhere within statement stmt.
+	assign := func(x, y ir.Node, stmt ir.Stmt) {
+		if i := tracking(x); i != nil {
+			// s = y. Check for understood patterns for y.
+			if y == nil || y.Op() == ir.ONIL {
+				// s = nil is ok.
+				i.okUses++
+			} else if y.Op() == ir.OSLICELIT {
+				// s = []{...} is ok.
+				// Note: this reveals capacity. Should it?
+				i.okUses++
+				i.capUsed = true
+			} else if y.Op() == ir.OSLICE {
+				y := y.(*ir.SliceExpr)
+				if y.X == i.s {
+					// s = s[...:...] is ok
+					i.okUses += 2
+					i.capUsed = true
+				}
+			} else if y.Op() == ir.OAPPEND {
+				y := y.(*ir.CallExpr)
+				if y.Args[0] == i.s {
+					// s = append(s, ...) is ok
+					i.okUses += 2
+					i.appends = append(i.appends, y)
+					i.appendWeight += 1 + loopDepth
+				}
+				// TODO: s = append(nil, ...)?
+			}
+			// Note that technically s = make([]T, ...) preserves exclusivity, but
+			// we don't track that because we assume users who wrote that know
+			// better than the compiler does.
+
+			// TODO: figure out how to handle s = fn(..., s, ...)
+			// It would be nice to maintain exclusivity of s in this situation.
+			// But unfortunately, fn can return one of its other arguments, which
+			// may be a slice with a stack-allocated backing store other than s.
+			// (which may have preexisting references to its backing store).
+			//
+			// Maybe we could do it if s is the only argument?
+		}
+
+		if i := tracking(y); i != nil {
+			// ... = s
+			// Treat this as an exclusive->nonexclusive transition.
+			i.okUses++
+			addTransition(i, stmt)
+		}
+	}
+
+	var do func(ir.Node) bool
+	do = func(n ir.Node) bool {
+		if n == nil {
+			return false
+		}
+		switch n.Op() {
+		case ir.ONAME:
+			if i := tracking(n); i != nil {
+				// A use of a slice variable. Count it.
+				i.allUses++
+			}
+		case ir.ODCL:
+			n := n.(*ir.Decl)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OINDEX:
+			n := n.(*ir.IndexExpr)
+			if i := tracking(n.X); i != nil {
+				// s[i] is ok.
+				i.okUses++
+			}
+		case ir.OLEN:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// len(s) is ok
+				i.okUses++
+			}
+		case ir.OCAP:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// cap(s) is ok
+				i.okUses++
+				i.capUsed = true
+			}
+		case ir.OADDR:
+			n := n.(*ir.AddrExpr)
+			if n.X.Op() == ir.OINDEX {
+				n := n.X.(*ir.IndexExpr)
+				if i := tracking(n.X); i != nil {
+					// &s[i] is definitely a nonexclusive transition.
+					// (We need this case because s[i] is ok, but &s[i] is not.)
+					i.s.Opt = nil
+				}
+			}
+		case ir.ORETURN:
+			n := n.(*ir.ReturnStmt)
+			for _, x := range n.Results {
+				if i := tracking(x); i != nil {
+					i.okUses++
+					// We go exclusive->nonexclusive here
+					addTransition(i, n)
+				}
+			}
+			if len(n.Results) == 0 {
+				// Uses of named result variables are implicit here.
+				for _, x := range namedRets {
+					if i := tracking(x); i != nil {
+						addTransition(i, n)
+					}
+				}
+			}
+		case ir.OCALLFUNC:
+			n := n.(*ir.CallExpr)
+			for idx, arg := range n.Args {
+				if i := tracking(arg); i != nil {
+					if !argLeak(n, idx) {
+						// Passing s to a nonescaping arg is ok.
+						i.okUses++
+						i.capUsed = true
+					}
+				}
+			}
+		case ir.ORANGE:
+			// Range over slice is ok.
+			n := n.(*ir.RangeStmt)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OAS:
+			n := n.(*ir.AssignStmt)
+			assign(n.X, n.Y, n)
+		case ir.OAS2:
+			n := n.(*ir.AssignListStmt)
+			for i := range len(n.Lhs) {
+				assign(n.Lhs[i], n.Rhs[i], n)
+			}
+		case ir.OCLOSURE:
+			n := n.(*ir.ClosureExpr)
+			for _, v := range n.Func.ClosureVars {
+				do(v.Outer)
+			}
+		}
+		if n.Op() == ir.OFOR || n.Op() == ir.ORANGE {
+			// Note: loopDepth isn't really right for init portion
+			// of the for statement, but that's ok. Correctness
+			// does not depend on depth info.
+			loopDepth++
+			defer func() { loopDepth-- }()
+		}
+		// Check all the children.
+		ir.DoChildren(n, do)
+		return false
+	}
+
+	// Run the analysis over the whole body.
+	for _, stmt := range fn.Body {
+		do(stmt)
+	}
+
+	// Process accumulated info to find slice variables
+	// that we can allocate on the stack.
+	for _, s := range fn.Dcl {
+		if s.Opt == nil {
+			continue
+		}
+		i := s.Opt.(*sliceInfo)
+		s.Opt = nil
+		if i.okUses != i.allUses {
+			// Some use of i.s that don't understand lurks. Give up.
+			continue
+		}
+
+		// At this point, we've decided that we *can* do
+		// the optimization.
+
+		if i.transition == nil {
+			// Exclusive for its whole lifetime. That means it
+			// didn't escape. We can already handle nonescaping
+			// slices without this pass.
+			continue
+		}
+		if i.appendWeight < 2 {
+			// This optimization only really helps if there is
+			// (dynamically) more than one append.
+			continue
+		}
+
+		// Commit point - at this point we've decided we *should*
+		// do the optimization.
+
+		// Insert a move2heap operation before the exclusive->nonexclusive
+		// transition.
+		move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s)
+		if i.capUsed {
+			move.PreserveCapacity = true
+		}
+		move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0])
+		move.SetType(i.s.Type())
+		move.SetTypecheck(1)
+		as := ir.NewAssignStmt(i.transition.Pos(), i.s, move)
+		as.SetTypecheck(1)
+		i.transition.PtrInit().Prepend(as)
+		// Note: we prepend because we need to put the move2heap
+		// operation first, before any other init work, as the transition
+		// might occur in the init work.
+
+		// Now that we've inserted a move2heap operation before every
+		// exclusive -> nonexclusive transition, appends can now use
+		// stack backing stores.
+		// (This is the whole point of this pass, to enable stack
+		// allocation of append backing stores.)
+		for _, a := range i.appends {
+			a.SetEsc(ir.EscNone)
+			if i.capUsed {
+				a.UseBuf = true
+			}
+		}
+	}
+}
+
+// argLeak reports if the idx'th argument to the call n escapes anywhere
+// (to the heap, another argument, return value, etc.)
+// If unknown returns true.
+func argLeak(n *ir.CallExpr, idx int) bool {
+	if n.Op() != ir.OCALLFUNC {
+		return true
+	}
+	fn := ir.StaticCalleeName(ir.StaticValue(n.Fun))
+	if fn == nil {
+		return true
+	}
+	fntype := fn.Type()
+	if recv := fntype.Recv(); recv != nil {
+		if idx == 0 {
+			return escape.ParseLeaks(recv.Note).Any()
+		}
+		idx--
+	}
+	return escape.ParseLeaks(fntype.Params()[idx].Note).Any()
+}
diff --git a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
index e42b54398db..b23ccc4a70a 100644
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@@ -118,6 +118,7 @@ func init() {
 		gp11sb         = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly}
 		gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
 		gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21sp2        = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly}
 		gp21sb         = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly}
 		gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
 		gp31shift      = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}}
@@ -262,7 +263,7 @@ func init() {
 		{name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 		{name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 
-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
+		{name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
 		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},
 		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true},
 		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true},
diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go
index 944e1d78548..f13373d2c05 100644
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@@ -7643,7 +7643,7 @@ var opcodeTable = [...]opInfo{
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 			outputs: []outputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go
index 3dea733bbdb..96be8ddd864 100644
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@@ -124,6 +124,11 @@ func InitConfig() {
 	ir.Syms.GCWriteBarrier[7] = typecheck.LookupRuntimeFunc("gcWriteBarrier8")
 	ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
 	ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
+	ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf")
+	ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice")
+	ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan")
+	ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap")
+	ir.Syms.MoveSliceNoCapNoScan = typecheck.LookupRuntimeFunc("moveSliceNoCapNoScan")
 	ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
 	for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ {
 		ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i))
@@ -1091,6 +1096,23 @@ type state struct {
 
 	// Block starting position, indexed by block id.
 	blockStarts []src.XPos
+
+	// Information for stack allocation. Indexed by the first argument
+	// to an append call. Normally a slice-typed variable, but not always.
+	backingStores map[ir.Node]*backingStoreInfo
+}
+
+type backingStoreInfo struct {
+	// Size of backing store array (in elements)
+	K int64
+	// Stack-allocated backing store variable.
+	store *ir.Name
+	// Dynamic boolean variable marking the fact that we used this backing store.
+	used *ir.Name
+	// Have we used this variable statically yet? This is just a hint
+	// to avoid checking the dynamic variable if the answer is obvious.
+	// (usedStatic == true implies used == true)
+	usedStatic bool
 }
 
 type funcLine struct {
@@ -3673,6 +3695,9 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value {
 	case ir.OAPPEND:
 		return s.append(n.(*ir.CallExpr), false)
 
+	case ir.OMOVE2HEAP:
+		return s.move2heap(n.(*ir.MoveToHeapExpr))
+
 	case ir.OMIN, ir.OMAX:
 		return s.minMax(n.(*ir.CallExpr))
 
@@ -3734,6 +3759,68 @@ func (s *state) resultAddrOfCall(c *ssa.Value, which int64, t *types.Type) *ssa.
 	return addr
 }
 
+// Get backing store information for an append call.
+func (s *state) getBackingStoreInfoForAppend(n *ir.CallExpr) *backingStoreInfo {
+	if n.Esc() != ir.EscNone {
+		return nil
+	}
+	return s.getBackingStoreInfo(n.Args[0])
+}
+func (s *state) getBackingStoreInfo(n ir.Node) *backingStoreInfo {
+	t := n.Type()
+	et := t.Elem()
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	if et.Size() == 0 || et.Size() > maxStackSize {
+		return nil
+	}
+	if base.Flag.N != 0 {
+		return nil
+	}
+	if !base.VariableMakeHash.MatchPos(n.Pos(), nil) {
+		return nil
+	}
+	i := s.backingStores[n]
+	if i != nil {
+		return i
+	}
+
+	// Build type of backing store.
+	K := maxStackSize / et.Size() // rounds down
+	KT := types.NewArray(et, K)
+	KT.SetNoalg(true)
+	types.CalcArraySize(KT)
+	// Align more than naturally for the type KT. See issue 73199.
+	align := types.NewArray(types.Types[types.TUINTPTR], 0)
+	types.CalcArraySize(align)
+	storeTyp := types.NewStruct([]*types.Field{
+		{Sym: types.BlankSym, Type: align},
+		{Sym: types.BlankSym, Type: KT},
+	})
+	storeTyp.SetNoalg(true)
+	types.CalcStructSize(storeTyp)
+
+	// Make backing store variable.
+	backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
+	backingStore.SetAddrtaken(true)
+
+	// Make "used" boolean.
+	used := typecheck.TempAt(n.Pos(), s.curfn, types.Types[types.TBOOL])
+	if s.curBlock == s.f.Entry {
+		s.vars[used] = s.constBool(false)
+	} else {
+		// initialize this variable at end of entry block
+		s.defvars[s.f.Entry.ID][used] = s.constBool(false)
+	}
+
+	// Initialize an info structure.
+	if s.backingStores == nil {
+		s.backingStores = map[ir.Node]*backingStoreInfo{}
+	}
+	i = &backingStoreInfo{K: K, store: backingStore, used: used, usedStatic: false}
+	s.backingStores[n] = i
+	return i
+}
+
 // append converts an OAPPEND node to SSA.
 // If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
 // adds it to s, and returns the Value.
@@ -3824,9 +3911,29 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// A stack-allocated backing store could be used at every
 	// append that qualifies, but we limit it in some cases to
 	// avoid wasted code and stack space.
-	// TODO: handle ... append case.
-	maxStackSize := int64(base.Debug.VariableMakeThreshold)
-	if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] {
+	//
+	// Note that we have two different strategies.
+	// 1. The standard strategy is just to allocate the full
+	//    backing store at the first append.
+	// 2. An alternate strategy is used when
+	//        a. The backing store eventually escapes via move2heap
+	//    and b. The capacity is used somehow
+	//    In this case, we don't want to just allocate
+	//    the full buffer at the first append, because when
+	//    we move2heap the buffer to the heap when it escapes,
+	//    we might end up wasting memory because we can't
+	//    change the capacity.
+	//    So in this case we use growsliceBuf to reuse the buffer
+	//    and walk one step up the size class ladder each time.
+	//
+	// TODO: handle ... append case? Currently we handle only
+	// a fixed number of appended elements.
+	var info *backingStoreInfo
+	if !inplace {
+		info = s.getBackingStoreInfoForAppend(n)
+	}
+
+	if !inplace && info != nil && !n.UseBuf && !info.usedStatic {
 		// if l <= K {
 		//   if !used {
 		//     if oldLen == 0 {
@@ -3850,43 +3957,19 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// It is ok to do it more often, but it is probably helpful only for
 		// the first instance. TODO: this could use more tuning. Using ir.Node
 		// as the key works for *ir.Name instances but probably nothing else.
-		if s.appendTargets == nil {
-			s.appendTargets = map[ir.Node]bool{}
-		}
-		s.appendTargets[sn] = true
-
-		K := maxStackSize / et.Size() // rounds down
-		KT := types.NewArray(et, K)
-		KT.SetNoalg(true)
-		types.CalcArraySize(KT)
-		// Align more than naturally for the type KT. See issue 73199.
-		align := types.NewArray(types.Types[types.TUINTPTR], 0)
-		types.CalcArraySize(align)
-		storeTyp := types.NewStruct([]*types.Field{
-			{Sym: types.BlankSym, Type: align},
-			{Sym: types.BlankSym, Type: KT},
-		})
-		storeTyp.SetNoalg(true)
-		types.CalcStructSize(storeTyp)
+		info.usedStatic = true
+		// TODO: unset usedStatic somehow?
 
 		usedTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		bodyBlock := s.f.NewBlock(ssa.BlockPlain)
 		growSlice := s.f.NewBlock(ssa.BlockPlain)
-
-		// Make "used" boolean.
-		tBool := types.Types[types.TBOOL]
-		used := typecheck.TempAt(n.Pos(), s.curfn, tBool)
-		s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry
-
-		// Make backing store variable.
 		tInt := types.Types[types.TINT]
-		backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
-		backingStore.SetAddrtaken(true)
+		tBool := types.Types[types.TBOOL]
 
 		// if l <= K
 		s.startBlock(grow)
-		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K))
+		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, info.K))
 		b := s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(kTest)
@@ -3896,7 +3979,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 
 		// if !used
 		s.startBlock(usedTestBlock)
-		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used))
+		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(info.used))
 		b = s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(usedTest)
@@ -3917,18 +4000,18 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// var store struct { _ [0]uintptr; arr [K]T }
 		s.startBlock(bodyBlock)
 		if et.HasPointers() {
-			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem())
+			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, info.store, s.mem())
 		}
-		addr := s.addr(backingStore)
-		s.zero(storeTyp, addr)
+		addr := s.addr(info.store)
+		s.zero(info.store.Type(), addr)
 
 		// s = store.arr[:l:K]
 		s.vars[ptrVar] = addr
 		s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test.
-		s.vars[capVar] = s.constInt(tInt, K)
+		s.vars[capVar] = s.constInt(tInt, info.K)
 
 		// used = true
-		s.assign(used, s.constBool(true), false, 0)
+		s.assign(info.used, s.constBool(true), false, 0)
 		b = s.endBlock()
 		b.AddEdgeTo(assign)
 
@@ -3939,7 +4022,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// Call growslice
 	s.startBlock(grow)
 	taddr := s.expr(n.Fun)
-	r := s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	var r []*ssa.Value
+	if info != nil && n.UseBuf {
+		// Use stack-allocated buffer as backing store, if we can.
+		if et.HasPointers() && !info.usedStatic {
+			// Initialize in the function header. Not the best place,
+			// but it makes sure we don't scan this area before it is
+			// initialized.
+			mem := s.defvars[s.f.Entry.ID][memVar]
+			mem = s.f.Entry.NewValue1A(n.Pos(), ssa.OpVarDef, types.TypeMem, info.store, mem)
+			addr := s.f.Entry.NewValue2A(n.Pos(), ssa.OpLocalAddr, types.NewPtr(info.store.Type()), info.store, s.sp, mem)
+			mem = s.f.Entry.NewValue2I(n.Pos(), ssa.OpZero, types.TypeMem, info.store.Type().Size(), addr, mem)
+			mem.Aux = info.store.Type()
+			s.defvars[s.f.Entry.ID][memVar] = mem
+			info.usedStatic = true
+		}
+		r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K))
+	} else {
+		r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	}
 
 	// Decompose output slice
 	p = s.newValue1(ssa.OpSlicePtr, pt, r[0])
@@ -4026,6 +4127,95 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	return s.newValue3(ssa.OpSliceMake, n.Type(), p, l, c)
 }
 
+func (s *state) move2heap(n *ir.MoveToHeapExpr) *ssa.Value {
+	// s := n.Slice
+	// if s.ptr points to current stack frame {
+	//     s2 := make([]T, s.len, s.cap)
+	//     copy(s2[:cap], s[:cap])
+	//     s = s2
+	// }
+	// return s
+
+	slice := s.expr(n.Slice)
+	et := slice.Type.Elem()
+	pt := types.NewPtr(et)
+
+	info := s.getBackingStoreInfo(n)
+	if info == nil {
+		// Backing store will never be stack allocated, so
+		// move2heap is a no-op.
+		return slice
+	}
+
+	// Decomposse input slice.
+	p := s.newValue1(ssa.OpSlicePtr, pt, slice)
+	l := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
+	c := s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], slice)
+
+	moveBlock := s.f.NewBlock(ssa.BlockPlain)
+	mergeBlock := s.f.NewBlock(ssa.BlockPlain)
+
+	s.vars[ptrVar] = p
+	s.vars[lenVar] = l
+	s.vars[capVar] = c
+
+	// Decide if we need to move the slice backing store.
+	// It needs to be moved if it is currently on the stack.
+	sub := ssa.OpSub64
+	less := ssa.OpLess64U
+	if s.config.PtrSize == 4 {
+		sub = ssa.OpSub32
+		less = ssa.OpLess32U
+	}
+	callerSP := s.newValue1(ssa.OpGetCallerSP, types.Types[types.TUINTPTR], s.mem())
+	frameSize := s.newValue2(sub, types.Types[types.TUINTPTR], callerSP, s.sp)
+	pInt := s.newValue2(ssa.OpConvert, types.Types[types.TUINTPTR], p, s.mem())
+	off := s.newValue2(sub, types.Types[types.TUINTPTR], pInt, s.sp)
+	cond := s.newValue2(less, types.Types[types.TBOOL], off, frameSize)
+
+	b := s.endBlock()
+	b.Kind = ssa.BlockIf
+	b.Likely = ssa.BranchUnlikely // fast path is to not have to call into runtime
+	b.SetControl(cond)
+	b.AddEdgeTo(moveBlock)
+	b.AddEdgeTo(mergeBlock)
+
+	// Move the slice to heap
+	s.startBlock(moveBlock)
+	var newSlice *ssa.Value
+	if et.HasPointers() {
+		typ := s.expr(n.RType)
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSlice, true, []*types.Type{slice.Type}, typ, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCap, true, []*types.Type{slice.Type}, typ, p, l)[0]
+		}
+	} else {
+		elemSize := s.constInt(types.Types[types.TUINTPTR], et.Size())
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoScan, true, []*types.Type{slice.Type}, elemSize, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCapNoScan, true, []*types.Type{slice.Type}, elemSize, p, l)[0]
+		}
+	}
+	// Decompose output slice
+	s.vars[ptrVar] = s.newValue1(ssa.OpSlicePtr, pt, newSlice)
+	s.vars[lenVar] = s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], newSlice)
+	s.vars[capVar] = s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], newSlice)
+	b = s.endBlock()
+	b.AddEdgeTo(mergeBlock)
+
+	// Merge fast path (no moving) and slow path (moved)
+	s.startBlock(mergeBlock)
+	p = s.variable(ptrVar, pt)                      // generates phi for ptr
+	l = s.variable(lenVar, types.Types[types.TINT]) // generates phi for len
+	c = s.variable(capVar, types.Types[types.TINT]) // generates phi for cap
+	delete(s.vars, ptrVar)
+	delete(s.vars, lenVar)
+	delete(s.vars, capVar)
+	return s.newValue3(ssa.OpSliceMake, slice.Type, p, l, c)
+}
+
 // minMax converts an OMIN/OMAX builtin call into SSA.
 func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
 	// The OMIN/OMAX builtin is variadic, but its semantics are
diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
index fbe8f77abd4..7988ebf5b93 100644
--- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go
+++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
@@ -195,6 +195,7 @@ func makeslice(typ *byte, len int, cap int) unsafe.Pointer
 func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer
 func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer
 func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any)
+func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any)
 func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64)
 func panicunsafeslicelen()
 func panicunsafeslicenilptr()
@@ -202,6 +203,11 @@ func unsafestringcheckptr(ptr unsafe.Pointer, len int64)
 func panicunsafestringlen()
 func panicunsafestringnilptr()
 
+func moveSlice(typ *byte, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoScan(elemSize uintptr, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoCap(typ *byte, old *byte, len int) (*byte, int, int)
+func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int)
+
 func memmove(to *any, frm *any, length uintptr)
 func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
 func memclrHasPointers(ptr unsafe.Pointer, n uintptr)
diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go
index ff72bdcf373..ee892856dd9 100644
--- a/src/cmd/compile/internal/typecheck/builtin.go
+++ b/src/cmd/compile/internal/typecheck/builtin.go
@@ -160,80 +160,85 @@ var runtimeDecls = [...]struct {
 	{"makeslice64", funcTag, 124},
 	{"makeslicecopy", funcTag, 125},
 	{"growslice", funcTag, 127},
-	{"unsafeslicecheckptr", funcTag, 128},
+	{"growsliceBuf", funcTag, 128},
+	{"unsafeslicecheckptr", funcTag, 129},
 	{"panicunsafeslicelen", funcTag, 9},
 	{"panicunsafeslicenilptr", funcTag, 9},
-	{"unsafestringcheckptr", funcTag, 129},
+	{"unsafestringcheckptr", funcTag, 130},
 	{"panicunsafestringlen", funcTag, 9},
 	{"panicunsafestringnilptr", funcTag, 9},
-	{"memmove", funcTag, 130},
-	{"memclrNoHeapPointers", funcTag, 131},
-	{"memclrHasPointers", funcTag, 131},
-	{"memequal", funcTag, 132},
-	{"memequal0", funcTag, 133},
-	{"memequal8", funcTag, 133},
-	{"memequal16", funcTag, 133},
-	{"memequal32", funcTag, 133},
-	{"memequal64", funcTag, 133},
-	{"memequal128", funcTag, 133},
-	{"f32equal", funcTag, 134},
-	{"f64equal", funcTag, 134},
-	{"c64equal", funcTag, 134},
-	{"c128equal", funcTag, 134},
-	{"strequal", funcTag, 134},
-	{"interequal", funcTag, 134},
-	{"nilinterequal", funcTag, 134},
-	{"memhash", funcTag, 135},
-	{"memhash0", funcTag, 136},
-	{"memhash8", funcTag, 136},
-	{"memhash16", funcTag, 136},
-	{"memhash32", funcTag, 136},
-	{"memhash64", funcTag, 136},
-	{"memhash128", funcTag, 136},
-	{"f32hash", funcTag, 137},
-	{"f64hash", funcTag, 137},
-	{"c64hash", funcTag, 137},
-	{"c128hash", funcTag, 137},
-	{"strhash", funcTag, 137},
-	{"interhash", funcTag, 137},
-	{"nilinterhash", funcTag, 137},
-	{"int64div", funcTag, 138},
-	{"uint64div", funcTag, 139},
-	{"int64mod", funcTag, 138},
-	{"uint64mod", funcTag, 139},
-	{"float64toint64", funcTag, 140},
-	{"float64touint64", funcTag, 141},
-	{"float64touint32", funcTag, 142},
-	{"int64tofloat64", funcTag, 143},
-	{"int64tofloat32", funcTag, 144},
-	{"uint64tofloat64", funcTag, 145},
-	{"uint64tofloat32", funcTag, 146},
-	{"uint32tofloat64", funcTag, 147},
-	{"complex128div", funcTag, 148},
+	{"moveSlice", funcTag, 131},
+	{"moveSliceNoScan", funcTag, 132},
+	{"moveSliceNoCap", funcTag, 133},
+	{"moveSliceNoCapNoScan", funcTag, 134},
+	{"memmove", funcTag, 135},
+	{"memclrNoHeapPointers", funcTag, 136},
+	{"memclrHasPointers", funcTag, 136},
+	{"memequal", funcTag, 137},
+	{"memequal0", funcTag, 138},
+	{"memequal8", funcTag, 138},
+	{"memequal16", funcTag, 138},
+	{"memequal32", funcTag, 138},
+	{"memequal64", funcTag, 138},
+	{"memequal128", funcTag, 138},
+	{"f32equal", funcTag, 139},
+	{"f64equal", funcTag, 139},
+	{"c64equal", funcTag, 139},
+	{"c128equal", funcTag, 139},
+	{"strequal", funcTag, 139},
+	{"interequal", funcTag, 139},
+	{"nilinterequal", funcTag, 139},
+	{"memhash", funcTag, 140},
+	{"memhash0", funcTag, 141},
+	{"memhash8", funcTag, 141},
+	{"memhash16", funcTag, 141},
+	{"memhash32", funcTag, 141},
+	{"memhash64", funcTag, 141},
+	{"memhash128", funcTag, 141},
+	{"f32hash", funcTag, 142},
+	{"f64hash", funcTag, 142},
+	{"c64hash", funcTag, 142},
+	{"c128hash", funcTag, 142},
+	{"strhash", funcTag, 142},
+	{"interhash", funcTag, 142},
+	{"nilinterhash", funcTag, 142},
+	{"int64div", funcTag, 143},
+	{"uint64div", funcTag, 144},
+	{"int64mod", funcTag, 143},
+	{"uint64mod", funcTag, 144},
+	{"float64toint64", funcTag, 145},
+	{"float64touint64", funcTag, 146},
+	{"float64touint32", funcTag, 147},
+	{"int64tofloat64", funcTag, 148},
+	{"int64tofloat32", funcTag, 149},
+	{"uint64tofloat64", funcTag, 150},
+	{"uint64tofloat32", funcTag, 151},
+	{"uint32tofloat64", funcTag, 152},
+	{"complex128div", funcTag, 153},
 	{"racefuncenter", funcTag, 33},
 	{"racefuncexit", funcTag, 9},
 	{"raceread", funcTag, 33},
 	{"racewrite", funcTag, 33},
-	{"racereadrange", funcTag, 149},
-	{"racewriterange", funcTag, 149},
-	{"msanread", funcTag, 149},
-	{"msanwrite", funcTag, 149},
-	{"msanmove", funcTag, 150},
-	{"asanread", funcTag, 149},
-	{"asanwrite", funcTag, 149},
-	{"checkptrAlignment", funcTag, 151},
-	{"checkptrArithmetic", funcTag, 153},
-	{"libfuzzerTraceCmp1", funcTag, 154},
-	{"libfuzzerTraceCmp2", funcTag, 155},
-	{"libfuzzerTraceCmp4", funcTag, 156},
-	{"libfuzzerTraceCmp8", funcTag, 157},
-	{"libfuzzerTraceConstCmp1", funcTag, 154},
-	{"libfuzzerTraceConstCmp2", funcTag, 155},
-	{"libfuzzerTraceConstCmp4", funcTag, 156},
-	{"libfuzzerTraceConstCmp8", funcTag, 157},
-	{"libfuzzerHookStrCmp", funcTag, 158},
-	{"libfuzzerHookEqualFold", funcTag, 158},
-	{"addCovMeta", funcTag, 160},
+	{"racereadrange", funcTag, 154},
+	{"racewriterange", funcTag, 154},
+	{"msanread", funcTag, 154},
+	{"msanwrite", funcTag, 154},
+	{"msanmove", funcTag, 155},
+	{"asanread", funcTag, 154},
+	{"asanwrite", funcTag, 154},
+	{"checkptrAlignment", funcTag, 156},
+	{"checkptrArithmetic", funcTag, 158},
+	{"libfuzzerTraceCmp1", funcTag, 159},
+	{"libfuzzerTraceCmp2", funcTag, 160},
+	{"libfuzzerTraceCmp4", funcTag, 161},
+	{"libfuzzerTraceCmp8", funcTag, 162},
+	{"libfuzzerTraceConstCmp1", funcTag, 159},
+	{"libfuzzerTraceConstCmp2", funcTag, 160},
+	{"libfuzzerTraceConstCmp4", funcTag, 161},
+	{"libfuzzerTraceConstCmp8", funcTag, 162},
+	{"libfuzzerHookStrCmp", funcTag, 163},
+	{"libfuzzerHookEqualFold", funcTag, 163},
+	{"addCovMeta", funcTag, 165},
 	{"x86HasPOPCNT", varTag, 6},
 	{"x86HasSSE41", varTag, 6},
 	{"x86HasFMA", varTag, 6},
@@ -243,11 +248,11 @@ var runtimeDecls = [...]struct {
 	{"loong64HasLAM_BH", varTag, 6},
 	{"loong64HasLSX", varTag, 6},
 	{"riscv64HasZbb", varTag, 6},
-	{"asanregisterglobals", funcTag, 131},
+	{"asanregisterglobals", funcTag, 136},
 }
 
 func runtimeTypes() []*types.Type {
-	var typs [161]*types.Type
+	var typs [166]*types.Type
 	typs[0] = types.ByteType
 	typs[1] = types.NewPtr(typs[0])
 	typs[2] = types.Types[types.TANY]
@@ -376,39 +381,44 @@ func runtimeTypes() []*types.Type {
 	typs[125] = newSig(params(typs[1], typs[13], typs[13], typs[7]), params(typs[7]))
 	typs[126] = types.NewSlice(typs[2])
 	typs[127] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1]), params(typs[126]))
-	typs[128] = newSig(params(typs[1], typs[7], typs[22]), nil)
-	typs[129] = newSig(params(typs[7], typs[22]), nil)
-	typs[130] = newSig(params(typs[3], typs[3], typs[5]), nil)
-	typs[131] = newSig(params(typs[7], typs[5]), nil)
-	typs[132] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
-	typs[133] = newSig(params(typs[3], typs[3]), params(typs[6]))
-	typs[134] = newSig(params(typs[7], typs[7]), params(typs[6]))
-	typs[135] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
-	typs[136] = newSig(params(typs[7], typs[5]), params(typs[5]))
-	typs[137] = newSig(params(typs[3], typs[5]), params(typs[5]))
-	typs[138] = newSig(params(typs[22], typs[22]), params(typs[22]))
-	typs[139] = newSig(params(typs[24], typs[24]), params(typs[24]))
-	typs[140] = newSig(params(typs[18]), params(typs[22]))
-	typs[141] = newSig(params(typs[18]), params(typs[24]))
-	typs[142] = newSig(params(typs[18]), params(typs[67]))
-	typs[143] = newSig(params(typs[22]), params(typs[18]))
-	typs[144] = newSig(params(typs[22]), params(typs[20]))
-	typs[145] = newSig(params(typs[24]), params(typs[18]))
-	typs[146] = newSig(params(typs[24]), params(typs[20]))
-	typs[147] = newSig(params(typs[67]), params(typs[18]))
-	typs[148] = newSig(params(typs[26], typs[26]), params(typs[26]))
-	typs[149] = newSig(params(typs[5], typs[5]), nil)
-	typs[150] = newSig(params(typs[5], typs[5], typs[5]), nil)
-	typs[151] = newSig(params(typs[7], typs[1], typs[5]), nil)
-	typs[152] = types.NewSlice(typs[7])
-	typs[153] = newSig(params(typs[7], typs[152]), nil)
-	typs[154] = newSig(params(typs[71], typs[71], typs[15]), nil)
-	typs[155] = newSig(params(typs[65], typs[65], typs[15]), nil)
-	typs[156] = newSig(params(typs[67], typs[67], typs[15]), nil)
-	typs[157] = newSig(params(typs[24], typs[24], typs[15]), nil)
-	typs[158] = newSig(params(typs[30], typs[30], typs[15]), nil)
-	typs[159] = types.NewArray(typs[0], 16)
-	typs[160] = newSig(params(typs[7], typs[67], typs[159], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
+	typs[128] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1], typs[3], typs[13]), params(typs[126]))
+	typs[129] = newSig(params(typs[1], typs[7], typs[22]), nil)
+	typs[130] = newSig(params(typs[7], typs[22]), nil)
+	typs[131] = newSig(params(typs[1], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[132] = newSig(params(typs[5], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[133] = newSig(params(typs[1], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[134] = newSig(params(typs[5], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[135] = newSig(params(typs[3], typs[3], typs[5]), nil)
+	typs[136] = newSig(params(typs[7], typs[5]), nil)
+	typs[137] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
+	typs[138] = newSig(params(typs[3], typs[3]), params(typs[6]))
+	typs[139] = newSig(params(typs[7], typs[7]), params(typs[6]))
+	typs[140] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
+	typs[141] = newSig(params(typs[7], typs[5]), params(typs[5]))
+	typs[142] = newSig(params(typs[3], typs[5]), params(typs[5]))
+	typs[143] = newSig(params(typs[22], typs[22]), params(typs[22]))
+	typs[144] = newSig(params(typs[24], typs[24]), params(typs[24]))
+	typs[145] = newSig(params(typs[18]), params(typs[22]))
+	typs[146] = newSig(params(typs[18]), params(typs[24]))
+	typs[147] = newSig(params(typs[18]), params(typs[67]))
+	typs[148] = newSig(params(typs[22]), params(typs[18]))
+	typs[149] = newSig(params(typs[22]), params(typs[20]))
+	typs[150] = newSig(params(typs[24]), params(typs[18]))
+	typs[151] = newSig(params(typs[24]), params(typs[20]))
+	typs[152] = newSig(params(typs[67]), params(typs[18]))
+	typs[153] = newSig(params(typs[26], typs[26]), params(typs[26]))
+	typs[154] = newSig(params(typs[5], typs[5]), nil)
+	typs[155] = newSig(params(typs[5], typs[5], typs[5]), nil)
+	typs[156] = newSig(params(typs[7], typs[1], typs[5]), nil)
+	typs[157] = types.NewSlice(typs[7])
+	typs[158] = newSig(params(typs[7], typs[157]), nil)
+	typs[159] = newSig(params(typs[71], typs[71], typs[15]), nil)
+	typs[160] = newSig(params(typs[65], typs[65], typs[15]), nil)
+	typs[161] = newSig(params(typs[67], typs[67], typs[15]), nil)
+	typs[162] = newSig(params(typs[24], typs[24], typs[15]), nil)
+	typs[163] = newSig(params(typs[30], typs[30], typs[15]), nil)
+	typs[164] = types.NewArray(typs[0], 16)
+	typs[165] = newSig(params(typs[7], typs[67], typs[164], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
 	return typs[:]
 }
 
diff --git a/src/cmd/compile/internal/walk/expr.go b/src/cmd/compile/internal/walk/expr.go
index 989ae0a1db2..2794671c73b 100644
--- a/src/cmd/compile/internal/walk/expr.go
+++ b/src/cmd/compile/internal/walk/expr.go
@@ -351,6 +351,11 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node {
 
 	case ir.OMETHVALUE:
 		return walkMethodValue(n.(*ir.SelectorExpr), init)
+
+	case ir.OMOVE2HEAP:
+		n := n.(*ir.MoveToHeapExpr)
+		n.Slice = walkExpr(n.Slice, init)
+		return n
 	}
 
 	// No return! Each case must return (or panic),
diff --git a/src/runtime/slice.go b/src/runtime/slice.go
index e31d5dccb24..a9e8fc16109 100644
--- a/src/runtime/slice.go
+++ b/src/runtime/slice.go
@@ -399,3 +399,107 @@ func bytealg_MakeNoZero(len int) []byte {
 	cap := roundupsize(uintptr(len), true)
 	return unsafe.Slice((*byte)(mallocgc(cap, nil, false)), cap)[:len]
 }
+
+// moveSlice copies the input slice to the heap and returns it.
+// et is the element type of the slice.
+func moveSlice(et *_type, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+	if cap == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	capmem := uintptr(cap) * et.Size_
+	new := mallocgc(capmem, et, true)
+	bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), capmem, et)
+	memmove(new, old, capmem)
+	return new, len, cap
+}
+
+// moveSliceNoScan is like moveSlice except the element type is known to
+// not have any pointers. We instead pass in the size of the element.
+func moveSliceNoScan(elemSize uintptr, old unsafe.Pointer, len, cap int) (unsafe.Pointer, int, int) {
+	if cap == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	capmem := uintptr(cap) * elemSize
+	new := mallocgc(capmem, nil, false)
+	memmove(new, old, capmem)
+	return new, len, cap
+}
+
+// moveSliceNoCap is like moveSlice, but can pick any appropriate capacity
+// for the returned slice.
+// Elements between len and cap in the returned slice will be zeroed.
+func moveSliceNoCap(et *_type, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+	if len == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	lenmem := uintptr(len) * et.Size_
+	capmem := roundupsize(lenmem, false)
+	new := mallocgc(capmem, et, true)
+	bulkBarrierPreWriteSrcOnly(uintptr(new), uintptr(old), lenmem, et)
+	memmove(new, old, lenmem)
+	return new, len, int(capmem / et.Size_)
+}
+
+// moveSliceNoCapNoScan is a combination of moveSliceNoScan and moveSliceNoCap.
+func moveSliceNoCapNoScan(elemSize uintptr, old unsafe.Pointer, len int) (unsafe.Pointer, int, int) {
+	if len == 0 {
+		if old != nil {
+			old = unsafe.Pointer(&zerobase)
+		}
+		return old, 0, 0
+	}
+	lenmem := uintptr(len) * elemSize
+	capmem := roundupsize(lenmem, true)
+	new := mallocgc(capmem, nil, false)
+	memmove(new, old, lenmem)
+	if capmem > lenmem {
+		memclrNoHeapPointers(add(new, lenmem), capmem-lenmem)
+	}
+	return new, len, int(capmem / elemSize)
+}
+
+// growsliceBuf is like growslice, but we can use the given buffer
+// as a backing store if we want. bufPtr must be on the stack.
+func growsliceBuf(oldPtr unsafe.Pointer, newLen, oldCap, num int, et *_type, bufPtr unsafe.Pointer, bufLen int) slice {
+	if newLen > bufLen {
+		// Doesn't fit, process like a normal growslice.
+		return growslice(oldPtr, newLen, oldCap, num, et)
+	}
+	oldLen := newLen - num
+	if oldPtr != bufPtr && oldLen != 0 {
+		// Move data to start of buffer.
+		// Note: bufPtr is on the stack, so no write barrier needed.
+		memmove(bufPtr, oldPtr, uintptr(oldLen)*et.Size_)
+	}
+	// Pick a new capacity.
+	//
+	// Unlike growslice, we don't need to double the size each time.
+	// The work done here is not proportional to the length of the slice.
+	// (Unless the memmove happens above, but that is rare, and in any
+	// case there are not many elements on this path.)
+	//
+	// Instead, we try to just bump up to the next size class.
+	// This will ensure that we don't waste any space when we eventually
+	// call moveSlice with the resulting slice.
+	newCap := int(roundupsize(uintptr(newLen)*et.Size_, !et.Pointers()) / et.Size_)
+
+	// Zero slice beyond newLen.
+	// The buffer is stack memory, so NoHeapPointers is ok.
+	// Caller will overwrite [oldLen:newLen], so we don't need to zero that portion.
+	// If et.Pointers(), buffer is at least initialized so we don't need to
+	// worry about the caller overwriting junk in [oldLen:newLen].
+	if newLen < newCap {
+		memclrNoHeapPointers(add(bufPtr, uintptr(newLen)*et.Size_), uintptr(newCap-newLen)*et.Size_)
+	}
+
+	return slice{bufPtr, newLen, newCap}
+}
diff --git a/src/runtime/slice_test.go b/src/runtime/slice_test.go
index cd2bc26d1eb..5463b6c02fb 100644
--- a/src/runtime/slice_test.go
+++ b/src/runtime/slice_test.go
@@ -6,6 +6,9 @@ package runtime_test
 
 import (
 	"fmt"
+	"internal/race"
+	"internal/testenv"
+	"runtime"
 	"testing"
 )
 
@@ -499,3 +502,319 @@ func BenchmarkAppendInPlace(b *testing.B) {
 
 	})
 }
+
+//go:noinline
+func byteSlice(n int) []byte {
+	var r []byte
+	for i := range n {
+		r = append(r, byte(i))
+	}
+	return r
+}
+func TestAppendByteInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	for _, test := range [][3]int{
+		{0, 0, 0},
+		{1, 1, 8},
+		{2, 1, 8},
+		{8, 1, 8},
+		{9, 1, 16},
+		{16, 1, 16},
+		{17, 1, 24},
+		{24, 1, 24},
+		{25, 1, 32},
+		{32, 1, 32},
+		{33, 1, 64}, // If we up the stack buffer size from 32->64, this line and the next would become 48.
+		{48, 1, 64},
+		{49, 1, 64},
+		{64, 1, 64},
+		{65, 2, 128},
+	} {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []byte
+		got := testing.AllocsPerRun(10, func() {
+			r = byteSlice(n)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+//go:noinline
+func ptrSlice(n int, p *[]*byte) {
+	var r []*byte
+	for range n {
+		r = append(r, nil)
+	}
+	*p = r
+}
+func TestAppendPtrInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	var tests [][3]int
+	if runtime.PtrSize == 8 {
+		tests = [][3]int{
+			{0, 0, 0},
+			{1, 1, 1},
+			{2, 1, 2},
+			{3, 1, 3}, // This is the interesting case, allocates 24 bytes when before it was 32.
+			{4, 1, 4},
+			{5, 1, 8},
+			{6, 1, 8},
+			{7, 1, 8},
+			{8, 1, 8},
+			{9, 2, 16},
+		}
+	} else {
+		tests = [][3]int{
+			{0, 0, 0},
+			{1, 1, 2},
+			{2, 1, 2},
+			{3, 1, 4},
+			{4, 1, 4},
+			{5, 1, 6}, // These two are also 24 bytes instead of 32.
+			{6, 1, 6}, //
+			{7, 1, 8},
+			{8, 1, 8},
+			{9, 1, 16},
+			{10, 1, 16},
+			{11, 1, 16},
+			{12, 1, 16},
+			{13, 1, 16},
+			{14, 1, 16},
+			{15, 1, 16},
+			{16, 1, 16},
+			{17, 2, 32},
+		}
+	}
+	for _, test := range tests {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []*byte
+		got := testing.AllocsPerRun(10, func() {
+			ptrSlice(n, &r)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+//go:noinline
+func byteCapSlice(n int) ([]byte, int) {
+	var r []byte
+	for i := range n {
+		r = append(r, byte(i))
+	}
+	return r, cap(r)
+}
+func TestAppendByteCapInLoop(t *testing.T) {
+	testenv.SkipIfOptimizationOff(t)
+	if race.Enabled {
+		t.Skip("skipping in -race mode")
+	}
+	for _, test := range [][3]int{
+		{0, 0, 0},
+		{1, 1, 8},
+		{2, 1, 8},
+		{8, 1, 8},
+		{9, 1, 16},
+		{16, 1, 16},
+		{17, 1, 24},
+		{24, 1, 24},
+		{25, 1, 32},
+		{32, 1, 32},
+		{33, 1, 64},
+		{48, 1, 64},
+		{49, 1, 64},
+		{64, 1, 64},
+		{65, 2, 128},
+	} {
+		n := test[0]
+		want := test[1]
+		wantCap := test[2]
+		var r []byte
+		got := testing.AllocsPerRun(10, func() {
+			r, _ = byteCapSlice(n)
+		})
+		if got != float64(want) {
+			t.Errorf("for size %d, got %f allocs want %d", n, got, want)
+		}
+		if cap(r) != wantCap {
+			t.Errorf("for size %d, got capacity %d want %d", n, cap(r), wantCap)
+		}
+	}
+}
+
+func TestAppendGeneric(t *testing.T) {
+	type I *int
+	r := testAppendGeneric[I](100)
+	if len(r) != 100 {
+		t.Errorf("bad length")
+	}
+}
+
+//go:noinline
+func testAppendGeneric[E any](n int) []E {
+	var r []E
+	var z E
+	for range n {
+		r = append(r, z)
+	}
+	return r
+}
+
+func appendSomeBytes(r []byte, s []byte) []byte {
+	for _, b := range s {
+		r = append(r, b)
+	}
+	return r
+}
+
+func TestAppendOfArg(t *testing.T) {
+	r := make([]byte, 24)
+	for i := 0; i < 24; i++ {
+		r[i] = byte(i)
+	}
+	appendSomeBytes(r, []byte{25, 26, 27})
+	// Do the same thing, trying to overwrite any
+	// stack-allocated buffers used above.
+	s := make([]byte, 24)
+	for i := 0; i < 24; i++ {
+		s[i] = 99
+	}
+	appendSomeBytes(s, []byte{99, 99, 99})
+	// Check that we still have the right data.
+	for i, b := range r {
+		if b != byte(i) {
+			t.Errorf("r[%d]=%d, want %d", i, b, byte(i))
+		}
+	}
+
+}
+
+func BenchmarkAppendInLoop(b *testing.B) {
+	for _, size := range []int{0, 1, 8, 16, 32, 64, 128} {
+		b.Run(fmt.Sprintf("%d", size),
+			func(b *testing.B) {
+				b.ReportAllocs()
+				for b.Loop() {
+					byteSlice(size)
+				}
+			})
+	}
+}
+
+func TestMoveToHeapEarly(t *testing.T) {
+	// Just checking that this compiles.
+	var x []int
+	y := x // causes a move2heap in the entry block
+	for range 5 {
+		x = append(x, 5)
+	}
+	_ = y
+}
+
+func TestMoveToHeapCap(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		c = cap(s)
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+
+//go:noinline
+func runit(f func()) {
+	f()
+}
+
+func TestMoveToHeapClosure1(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		runit(func() {
+			c = cap(s)
+		})
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+func TestMoveToHeapClosure2(t *testing.T) {
+	var c int
+	r := func() []byte {
+		var s []byte
+		for i := range 10 {
+			s = append(s, byte(i))
+		}
+		c = func() int {
+			return cap(s)
+		}()
+		return s
+	}()
+	if c != cap(r) {
+		t.Errorf("got cap=%d, want %d", c, cap(r))
+	}
+	sinkSlice = r
+}
+
+//go:noinline
+func buildClosure(t *testing.T) ([]byte, func()) {
+	var s []byte
+	for i := range 20 {
+		s = append(s, byte(i))
+	}
+	c := func() {
+		for i, b := range s {
+			if b != byte(i) {
+				t.Errorf("s[%d]=%d, want %d", i, b, i)
+			}
+		}
+	}
+	return s, c
+}
+
+func TestMoveToHeapClosure3(t *testing.T) {
+	_, f := buildClosure(t)
+	overwriteStack(0)
+	f()
+}
+
+//go:noinline
+func overwriteStack(n int) uint64 {
+	var x [100]uint64
+	for i := range x {
+		x[i] = 0xabcdabcdabcdabcd
+	}
+	return x[n]
+}
+
+var sinkSlice []byte
diff --git a/test/codegen/append.go b/test/codegen/append.go
new file mode 100644
index 00000000000..0e58a48c458
--- /dev/null
+++ b/test/codegen/append.go
@@ -0,0 +1,190 @@
+// asmcheck
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package codegen
+
+func Append1(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append2(n int) (r []int) {
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return
+}
+
+func Append3(n int) (r []int) {
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append4(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, i)
+	}
+	println(cap(r))
+	// amd64:`.*moveSliceNoScan`
+	return r
+}
+
+func Append5(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, i)
+	}
+	useSlice(r)
+	// amd64:`.*moveSliceNoScan`
+	return r
+}
+
+func Append6(n int) []*int {
+	var r []*int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, new(i))
+	}
+	// amd64:`.*moveSliceNoCap`
+	return r
+}
+
+func Append7(n int) []*int {
+	var r []*int
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, new(i))
+	}
+	println(cap(r))
+	// amd64:`.*moveSlice`
+	return r
+}
+
+func Append8(n int, p *[]int) {
+	var r []int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	*p = r
+}
+
+func Append9(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	println(len(r))
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append10(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	println(r[3])
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append11(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, i)
+	}
+	r = r[3:5]
+	// amd64:`.*moveSliceNoScan`
+	return r
+}
+
+func Append12(n int) []int {
+	var r []int
+	r = nil
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append13(n int) []int {
+	var r []int
+	r, r = nil, nil
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append14(n int) []int {
+	var r []int
+	r = []int{3, 4, 5}
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoScan`
+	return r
+}
+
+func Append15(n int) []int {
+	r := []int{3, 4, 5}
+	for i := range n {
+		// amd64:`.*growsliceBuf`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoScan`
+	return r
+}
+
+func Append16(r []int, n int) []int {
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+func Append17(n int) []int {
+	var r []int
+	for i := range n {
+		// amd64:`.*growslice`
+		r = append(r, i)
+	}
+	for i, x := range r {
+		println(i, x)
+	}
+	// amd64:`.*moveSliceNoCapNoScan`
+	return r
+}
+
+//go:noinline
+func useSlice(s []int) {
+}