[dev.simd] all: merge master (ca37d24) into dev.simd

Conflicts: - src/cmd/compile/internal/typecheck/builtin.go Merge List: + 2025-11-20 ca37d24e0b net/http: drop unused "broken" field from persistConn + 2025-11-20 4b740af56a cmd/internal/obj/x86: handle global reference in From3 in dynlink mode + 2025-11-20 790384c6c2 spec: adjust rule for type parameter on RHS of alias declaration + 2025-11-20 a49b0302d0 net/http: correctly close fake net.Conns + 2025-11-20 32f5aadd2f cmd/compile: stack allocate backing stores during append + 2025-11-20 a18aff8057 runtime: select GC mark workers during start-the-world + 2025-11-20 829779f4fe runtime: split findRunnableGCWorker in two + 2025-11-20 ab59569099 go/version: use "custom" as an example of a version suffix + 2025-11-19 c4bb9653ba cmd/compile: Implement LoweredZeroLoop with LSX Instruction on loong64 + 2025-11-19 7f2ae21fb4 cmd/internal/obj/loong64: add MULW.D.W[U] instructions + 2025-11-19 a2946f2385 crypto: add Encapsulator and Decapsulator interfaces + 2025-11-19 6b83bd7146 crypto/ecdh: add KeyExchanger interface + 2025-11-19 4fef9f8b55 go/types, types2: fix object path for grouped declaration statements + 2025-11-19 33529db142 spec: escape double-ampersands + 2025-11-19 dc42565a20 cmd/compile: fix control flow for unsigned divisions proof relations + 2025-11-19 e64023dcbf cmd/compile: cleanup useless if statement in prove + 2025-11-19 2239520d1c test: go fmt prove.go tests + 2025-11-19 489d3dafb7 math: switch s390x math.Pow to generic implementation + 2025-11-18 8c41a482f9 runtime: add dlog.hexdump + 2025-11-18 e912618bd2 runtime: add hexdumper + 2025-11-18 2cf9d4b62f Revert "net/http: do not discard body content when closing it within request handlers" + 2025-11-18 4d0658bb08 cmd/compile: prefer fixed registers for values + 2025-11-18 ba634ca5c7 cmd/compile: fold boolean NOT into branches + 2025-11-18 8806d53c10 cmd/link: align sections, not symbols after DWARF compress + 2025-11-18 c93766007d runtime: do not print recovered when double panic with the same value + 2025-11-18 9859b43643 cmd/asm,cmd/compile,cmd/internal/obj/riscv: use compressed instructions on riscv64 + 2025-11-17 b9ef0633f6 cmd/internal/sys,internal/goarch,runtime: enable the use of compressed instructions on riscv64 + 2025-11-17 a087dea869 debug/elf: sync new loong64 relocation types up to LoongArch ELF psABI v20250521 + 2025-11-17 e1a12c781f cmd/compile: use 32x32->64 multiplies on arm64 + 2025-11-17 6caab99026 runtime: relax TestMemoryLimit on darwin a bit more + 2025-11-17 eda2e8c683 runtime: clear frame pointer at thread entry points + 2025-11-17 6919858338 runtime: rename findrunnable references to findRunnable + 2025-11-17 8e734ec954 go/ast: fix BasicLit.End position for raw strings containing \r + 2025-11-17 592775ec7d crypto/mlkem: avoid a few unnecessary inverse NTT calls + 2025-11-17 590cf18daf crypto/mlkem/mlkemtest: add derandomized Encapsulate768/1024 + 2025-11-17 c12c337099 cmd/compile: teach prove about subtract idioms + 2025-11-17 bc15963813 cmd/compile: clean up prove pass + 2025-11-17 1297fae708 go/token: add (*File).End method + 2025-11-17 65c09eafdf runtime: hoist invariant code out of heapBitsSmallForAddrInline + 2025-11-17 594129b80c internal/runtime/maps: update doc for table.Clear + 2025-11-15 c58d075e9a crypto/rsa: deprecate PKCS#1 v1.5 encryption + 2025-11-14 d55ecea9e5 runtime: usleep before stealing runnext only if not in syscall + 2025-11-14 410ef44f00 cmd: update x/tools to 59ff18c + 2025-11-14 50128a2154 runtime: support runtime.freegc in size-specialized mallocs for noscan objects + 2025-11-14 c3708350a4 cmd/go: tests: rename git-min-vers->git-sha256 + 2025-11-14 aea881230d std: fix printf("%q", int) mistakes + 2025-11-14 120f1874ef runtime: add more precise test of assist credit handling for runtime.freegc + 2025-11-14 fecfcaa4f6 runtime: add runtime.freegc to reduce GC work + 2025-11-14 5a347b775e runtime: set GOEXPERIMENT=runtimefreegc to disabled by default + 2025-11-14 1a03d0db3f runtime: skip tests for GOEXPERIMENT=arenas that do not handle clobberfree=1 + 2025-11-14 cb0d9980f5 net/http: do not discard body content when closing it within request handlers + 2025-11-14 03ed43988f cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-11-14 1bb1f2bf0c runtime: put AddCleanup cleanup arguments in their own allocation + 2025-11-14 9fd2e44439 runtime: add AddCleanup benchmark + 2025-11-14 80c91eedbb runtime: ensure weak handles end up in their own allocation + 2025-11-14 7a8d0b5d53 runtime: add debug mode to extend _Grunning-without-P windows + 2025-11-14 710abf74da internal/runtime/cgobench: add Go function call benchmark for comparison + 2025-11-14 b24aec598b doc, cmd/internal/obj/riscv: document the riscv64 assembler + 2025-11-14 a0e738c657 cmd/compile/internal: remove incorrect riscv64 SLTI rule + 2025-11-14 2cdcc4150b cmd/compile: fold negation into multiplication + 2025-11-14 b57962b7c7 bytes: fix panic in bytes.Buffer.Peek + 2025-11-14 0a569528ea cmd/compile: optimize comparisons with single bit difference + 2025-11-14 1e5e6663e9 cmd/compile: remove unnecessary casts and types from riscv64 rules + 2025-11-14 ddd8558e61 go/types, types2: swap object.color for Checker.objPathIdx + 2025-11-14 9daaab305c cmd/link/internal/ld: make runtime.buildVersion with experiments valid + 2025-11-13 d50a571ddf test: fix tests to work with sizespecializedmalloc turned off + 2025-11-13 704f841eab cmd/trace: annotation proc start/stop with thread and proc always + 2025-11-13 17a02b9106 net/http: remove unused isLitOrSingle and isNotToken + 2025-11-13 ff61991aed cmd/go: fix flaky TestScript/mod_get_direct + 2025-11-13 129d0cb543 net/http/cgi: accept INCLUDED as protocol for server side includes + 2025-11-13 77c5130100 go/types: minor simplification + 2025-11-13 7601cd3880 go/types: generate cycles.go + 2025-11-13 7a372affd9 go/types, types2: rename definedType to declaredType and clarify docs Change-Id: Ibaa9bdb982364892f80e511c1bb12661fcd5fb86
2025-12-08 06:10:04 +00:00 · 2025-11-20 14:40:43 -05:00 · 2025-11-20 14:40:43 -05:00 · e3d4645693
commit e3d4645693
parent 95b4ad525f ca37d24e0b
347 changed files with 10387 additions and 2812 deletions
--- a/api/next/73627.txt
+++ b/api/next/73627.txt
@ -0,0 +1,2 @@
+pkg crypto/mlkem/mlkemtest, func Encapsulate1024(*mlkem.EncapsulationKey1024, []uint8) ([]uint8, []uint8, error) #73627
+pkg crypto/mlkem/mlkemtest, func Encapsulate768(*mlkem.EncapsulationKey768, []uint8) ([]uint8, []uint8, error) #73627
--- a/api/next/75300.txt
+++ b/api/next/75300.txt
@ -0,0 +1,12 @@
+pkg crypto, type Decapsulator interface { Decapsulate, Encapsulator } #75300
+pkg crypto, type Decapsulator interface, Decapsulate([]uint8) ([]uint8, error) #75300
+pkg crypto, type Decapsulator interface, Encapsulator() Encapsulator #75300
+pkg crypto, type Encapsulator interface { Bytes, Encapsulate } #75300
+pkg crypto, type Encapsulator interface, Bytes() []uint8 #75300
+pkg crypto, type Encapsulator interface, Encapsulate() ([]uint8, []uint8) #75300
+pkg crypto/ecdh, type KeyExchanger interface { Curve, ECDH, PublicKey } #75300
+pkg crypto/ecdh, type KeyExchanger interface, Curve() Curve #75300
+pkg crypto/ecdh, type KeyExchanger interface, ECDH(*PublicKey) ([]uint8, error) #75300
+pkg crypto/ecdh, type KeyExchanger interface, PublicKey() *PublicKey #75300
+pkg crypto/mlkem, method (*DecapsulationKey1024) Encapsulator() crypto.Encapsulator #75300
+pkg crypto/mlkem, method (*DecapsulationKey768) Encapsulator() crypto.Encapsulator #75300
--- a/api/next/75302.txt
+++ b/api/next/75302.txt
@ -0,0 +1,4 @@
+pkg crypto/rsa, func DecryptPKCS1v15 //deprecated #75302
+pkg crypto/rsa, func DecryptPKCS1v15SessionKey //deprecated #75302
+pkg crypto/rsa, func EncryptPKCS1v15 //deprecated #75302
+pkg crypto/rsa, type PKCS1v15DecryptOptions //deprecated #75302
--- a/api/next/75562.txt
+++ b/api/next/75562.txt
@ -0,0 +1,38 @@
+pkg debug/elf, const R_LARCH_TLS_DESC32 = 13 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC32 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64 = 14 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64 R_LARCH #75562
+pkg debug/elf, const R_LARCH_CALL36 = 110 #75562
+pkg debug/elf, const R_LARCH_CALL36 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PC_HI20 = 111 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PC_HI20 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PC_LO12 = 112 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PC_LO12 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_PC_LO20 = 113 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_PC_LO20 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_PC_HI12 = 114 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_PC_HI12 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_HI20 = 115 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_HI20 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_LO12 = 116 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_LO12 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_LO20 = 117 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_LO20 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_HI12 = 118 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC64_HI12 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_LD = 119 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_LD R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_CALL = 120 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_CALL R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_LE_HI20_R = 121 #75562
+pkg debug/elf, const R_LARCH_TLS_LE_HI20_R R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_LE_ADD_R = 122 #75562
+pkg debug/elf, const R_LARCH_TLS_LE_ADD_R R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_LE_LO12_R = 123 #75562
+pkg debug/elf, const R_LARCH_TLS_LE_LO12_R R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_LD_PCREL20_S2 = 124 #75562
+pkg debug/elf, const R_LARCH_TLS_LD_PCREL20_S2 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_GD_PCREL20_S2 = 125 #75562
+pkg debug/elf, const R_LARCH_TLS_GD_PCREL20_S2 R_LARCH #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PCREL20_S2 = 126 #75562
+pkg debug/elf, const R_LARCH_TLS_DESC_PCREL20_S2 R_LARCH #75562
--- a/api/next/75849.txt
+++ b/api/next/75849.txt
@ -0,0 +1 @@
+pkg go/token, method (*File) End() Pos #75849
--- a/api/next/76031.txt
+++ b/api/next/76031.txt
@ -0,0 +1 @@
+pkg go/ast, type BasicLit struct, ValueEnd token.Pos #76031
--- a/doc/asm.html
+++ b/doc/asm.html
@ -1039,6 +1039,12 @@ The value of <code>GOMIPS64</code> environment variable (<code>hardfloat</code>
 <code>GOMIPS64_hardfloat</code> or <code>GOMIPS64_softfloat</code>.
 </p>

+<h3 id="riscv64">RISCV64</h3>
+
+<p>
+Reference: <a href="/pkg/cmd/internal/obj/riscv">Go RISCV64 Assembly Instructions Reference Manual</a>
+</p>
+
 <h3 id="unsupported_opcodes">Unsupported opcodes</h3>

 <p>
--- a/doc/go_spec.html
+++ b/doc/go_spec.html
@ -1,6 +1,6 @@
 <!--{
 	"Title": "The Go Programming Language Specification",
-	"Subtitle": "Language version go1.26 (Nov 12, 2025)",
+	"Subtitle": "Language version go1.26 (Nov 18, 2025)",
 	"Path": "/ref/spec"
 }-->

@ -2487,11 +2487,15 @@ type set[P comparable] = map[P]bool
 </pre>

 <p>
-In an alias declaration the given type cannot be a type parameter.
+In an alias declaration the given type cannot be a type parameter declared in the same declaration.
 </p>

 <pre>
-type A[P any] = P    // illegal: P is a type parameter
+type A[P any] = P   // illegal: P is a type parameter declared in the declaration of A
+
+func f[P any]() {
+	type A = P  // ok: T is a type parameter declared by the enclosing function
+}
 </pre>

 <h4 id="Type_definitions">Type definitions</h4>
@ -2601,8 +2605,8 @@ In a type definition the given type cannot be a type parameter.
 <pre>
 type T[P any] P    // illegal: P is a type parameter

-func f[T any]() {
-	type L T   // illegal: T is a type parameter declared by the enclosing function
+func f[P any]() {
+	type L P   // illegal: P is a type parameter declared by the enclosing function
 }
 </pre>

@ -4857,7 +4861,7 @@ For instance, <code>x / y * z</code> is the same as <code>(x / y) * z</code>.
 x &lt;= f()                   // x &lt;= f()
 ^a &gt;&gt; b                    // (^a) >> b
 f() || g()                 // f() || g()
-x == y+1 &amp;&amp; &lt;-chanInt &gt; 0  // (x == (y+1)) && ((&lt;-chanInt) > 0)
+x == y+1 &amp;&amp; &lt;-chanInt &gt; 0  // (x == (y+1)) &amp;&amp; ((&lt;-chanInt) > 0)
 </pre>


@ -6867,7 +6871,7 @@ type Tree[K cmp.Ordered, V any] struct {
 }

 func (t *Tree[K, V]) walk(yield func(key K, val V) bool) bool {
-	return t == nil || t.left.walk(yield) && yield(t.key, t.value) && t.right.walk(yield)
+	return t == nil || t.left.walk(yield) &amp;&amp; yield(t.key, t.value) &amp;&amp; t.right.walk(yield)
 }

 func (t *Tree[K, V]) Walk(yield func(key K, val V) bool) {
--- a/doc/next/6-stdlib/99-minor/crypto/75300.md
+++ b/doc/next/6-stdlib/99-minor/crypto/75300.md
@ -0,0 +1,2 @@
+The new [Encapsulator] and [Decapsulator] interfaces allow accepting abstract
+KEM encapsulation or decapsulation keys.
--- a/doc/next/6-stdlib/99-minor/crypto/ecdh/75300.md
+++ b/doc/next/6-stdlib/99-minor/crypto/ecdh/75300.md
@ -0,0 +1,2 @@
+The new [KeyExchanger] interface, implemented by [PrivateKey], makes it possible
+to accept abstract ECDH private keys, e.g. those implemented in hardware.
--- a/doc/next/6-stdlib/99-minor/crypto/mlkem/75300.md
+++ b/doc/next/6-stdlib/99-minor/crypto/mlkem/75300.md
@ -0,0 +1,3 @@
+The new [DecapsulationKey768.Encapsulator] and
+[DecapsulationKey1024.Encapsulator] methods implement the new
+[crypto.Decapsulator] interface.
--- a/doc/next/6-stdlib/99-minor/crypto/mlkem/mlkemtest/73627.md
+++ b/doc/next/6-stdlib/99-minor/crypto/mlkem/mlkemtest/73627.md
@ -0,0 +1,3 @@
+The new [crypto/mlkem/mlkemtest] package exposes the [Encapsulate768] and
+[Encapsulate1024] functions which implement derandomized ML-KEM encapsulation,
+for use with known-answer tests.
--- a/doc/next/6-stdlib/99-minor/crypto/rsa/75302.md
+++ b/doc/next/6-stdlib/99-minor/crypto/rsa/75302.md
@ -0,0 +1,2 @@
+Unsafe PKCS #1 v1.5 encryption padding (implemented by [EncryptPKCS1v15],
+[DecryptPKCS1v15], and [DecryptPKCS1v15SessionKey]) is now deprecated.
--- a/doc/next/6-stdlib/99-minor/debug/elf/75562.md
+++ b/doc/next/6-stdlib/99-minor/debug/elf/75562.md
@ -0,0 +1,4 @@
+Additional `R_LARCH_*` constants from [LoongArch ELF psABI v20250521][laelf-20250521]
+(global version v2.40) are defined for use with LoongArch systems.
+
+[laelf-20250521]: https://github.com/loongson/la-abi-specs/blob/v2.40/laelf.adoc
--- a/doc/next/6-stdlib/99-minor/go/ast/76031.md
+++ b/doc/next/6-stdlib/99-minor/go/ast/76031.md
@ -0,0 +1,5 @@
+The new [BasicLit.ValueEnd] field records the precise end position of
+a literal so that the [BasicLit.End] method can now always return the
+correct answer. (Previously it was computed using a heuristic that was
+incorrect for multi-line raw string literals in Windows source files,
+due to removal of carriage returns.)
--- a/doc/next/6-stdlib/99-minor/go/token/75849.md
+++ b/doc/next/6-stdlib/99-minor/go/token/75849.md
@ -0,0 +1 @@
+The new [File.End] convenience method returns the file's end position.
--- a/src/bytes/buffer.go
+++ b/src/bytes/buffer.go
@ -86,7 +86,7 @@ func (b *Buffer) Peek(n int) ([]byte, error) {
 	if b.Len() < n {
 		return b.buf[b.off:], io.EOF
 	}
-	return b.buf[b.off:n], nil
+	return b.buf[b.off : b.off+n], nil
 }

 // empty reports whether the unread portion of the buffer is empty.
--- a/src/bytes/buffer_test.go
+++ b/src/bytes/buffer_test.go
@ -533,19 +533,25 @@ func TestReadString(t *testing.T) {

 var peekTests = []struct {
 	buffer   string
+	skip     int
 	n        int
 	expected string
 	err      error
 }{
-	{"", 0, "", nil},
-	{"aaa", 3, "aaa", nil},
-	{"foobar", 2, "fo", nil},
-	{"a", 2, "a", io.EOF},
+	{"", 0, 0, "", nil},
+	{"aaa", 0, 3, "aaa", nil},
+	{"foobar", 0, 2, "fo", nil},
+	{"a", 0, 2, "a", io.EOF},
+	{"helloworld", 4, 3, "owo", nil},
+	{"helloworld", 5, 5, "world", nil},
+	{"helloworld", 5, 6, "world", io.EOF},
+	{"helloworld", 10, 1, "", io.EOF},
 }

 func TestPeek(t *testing.T) {
 	for _, test := range peekTests {
 		buf := NewBufferString(test.buffer)
+		buf.Next(test.skip)
 		bytes, err := buf.Peek(test.n)
 		if string(bytes) != test.expected {
 			t.Errorf("expected %q, got %q", test.expected, bytes)
@ -553,8 +559,8 @@ func TestPeek(t *testing.T) {
 		if err != test.err {
 			t.Errorf("expected error %v, got %v", test.err, err)
 		}
-		if buf.Len() != len(test.buffer) {
-			t.Errorf("bad length after peek: %d, want %d", buf.Len(), len(test.buffer))
+		if buf.Len() != len(test.buffer)-test.skip {
+			t.Errorf("bad length after peek: %d, want %d", buf.Len(), len(test.buffer)-test.skip)
 		}
 	}
 }
--- a/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s
+++ b/src/cmd/asm/internal/asm/testdata/amd64dynlinkerror.s
@ -169,3 +169,8 @@ TEXT ·a34(SB), 0, $0-0
 	SHLXQ AX, CX, R15
 	ADDQ $1, R15
 	RET
+
+// Ensure from3 get GOT-rewritten without errors.
+TEXT ·a35(SB), 0, $0-0
+	VGF2P8AFFINEQB	$0, runtime·writeBarrier(SB), Z1, Z1
+	RET
--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s
+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s
@ -212,6 +212,12 @@ lable2:
 	SRLV	$32, R4, R5 		// 85804500
 	SRLV	$32, R4			// 84804500

+	// MULW.D.W[U] instructions
+	MULWVW	R4, R5			// a5101f00
+	MULWVW	R4, R5, R6		// a6101f00
+	MULWVWU	R4, R5			// a5901f00
+	MULWVWU	R4, R5, R6		// a6901f00
+
 	MASKEQZ	R4, R5, R6		// a6101300
 	MASKNEZ	R4, R5, R6		// a6901300

--- a/src/cmd/asm/internal/flags/flags.go
+++ b/src/cmd/asm/internal/flags/flags.go
@ -29,6 +29,7 @@ var (
 )

 var DebugFlags struct {
+	CompressInstructions int    `help:"use compressed instructions when possible (if supported by architecture)"`
 	MayMoreStack         string `help:"call named function before all stack growth checks"`
 	PCTab                string `help:"print named pc-value table\nOne of: pctospadj, pctofile, pctoline, pctoinline, pctopcdata"`
 }
@ -47,6 +48,8 @@ func init() {
 	flag.Var(objabi.NewDebugFlag(&DebugFlags, nil), "d", "enable debugging settings; try -d help")
 	objabi.AddVersionFlag() // -V
 	objabi.Flagcount("S", "print assembly and machine code", &PrintOut)
+
+	DebugFlags.CompressInstructions = 1
 }

 // MultiFlag allows setting a value multiple times to collect a list, as in -I=dir1 -I=dir2.
--- a/src/cmd/asm/main.go
+++ b/src/cmd/asm/main.go
@ -40,6 +40,7 @@ func main() {
 		log.Fatalf("unrecognized architecture %s", GOARCH)
 	}
 	ctxt := obj.Linknew(architecture.LinkArch)
+	ctxt.CompressInstructions = flags.DebugFlags.CompressInstructions != 0
 	ctxt.Debugasm = flags.PrintOut
 	ctxt.Debugvlog = flags.DebugV
 	ctxt.Flag_dynlink = *flags.Dynlink
--- a/src/cmd/compile/internal/base/debug.go
+++ b/src/cmd/compile/internal/base/debug.go
@ -20,6 +20,7 @@ type DebugFlags struct {
 	Append                int    `help:"print information about append compilation"`
 	Checkptr              int    `help:"instrument unsafe pointer conversions\n0: instrumentation disabled\n1: conversions involving unsafe.Pointer are instrumented\n2: conversions to unsafe.Pointer force heap allocation" concurrent:"ok"`
 	Closure               int    `help:"print information about closure compilation"`
+	CompressInstructions  int    `help:"use compressed instructions when possible (if supported by architecture)"`
 	Converthash           string `help:"hash value for use in debugging changes to platform-dependent float-to-[u]int conversion" concurrent:"ok"`
 	Defer                 int    `help:"print information about defer compilation"`
 	DisableNil            int    `help:"disable nil checks" concurrent:"ok"`
--- a/src/cmd/compile/internal/base/flag.go
+++ b/src/cmd/compile/internal/base/flag.go
@ -177,6 +177,7 @@ func ParseFlags() {
 	Flag.WB = true

 	Debug.ConcurrentOk = true
+	Debug.CompressInstructions = 1
 	Debug.MaxShapeLen = 500
 	Debug.AlignHot = 1
 	Debug.InlFuncsWithClosures = 1
@ -299,6 +300,7 @@ func ParseFlags() {
 	}
 	parseSpectre(Flag.Spectre) // left as string for RecordFlags

+	Ctxt.CompressInstructions = Debug.CompressInstructions != 0
 	Ctxt.Flag_shared = Ctxt.Flag_dynlink || Ctxt.Flag_shared
 	Ctxt.Flag_optimize = Flag.N == 0
 	Ctxt.Debugasm = int(Flag.S)
--- a/src/cmd/compile/internal/deadlocals/deadlocals.go
+++ b/src/cmd/compile/internal/deadlocals/deadlocals.go
@ -44,6 +44,11 @@ func Funcs(fns []*ir.Func) {
 				*as.lhs = ir.BlankNode
 				*as.rhs = zero
 			}
+			if len(assigns) > 0 {
+				// k.Defn might be pointing at one of the
+				// assignments we're overwriting.
+				k.Defn = nil
+			}
 		}
 	}
 }
--- a/src/cmd/compile/internal/escape/leaks.go
+++ b/src/cmd/compile/internal/escape/leaks.go
@ -124,3 +124,21 @@ func parseLeaks(s string) leaks {
 	copy(l[:], s[4:])
 	return l
 }
+
+func ParseLeaks(s string) leaks {
+	return parseLeaks(s)
+}
+
+// Any reports whether the value flows anywhere at all.
+func (l leaks) Any() bool {
+	// TODO: do mutator/callee matter?
+	if l.Heap() >= 0 || l.Mutator() >= 0 || l.Callee() >= 0 {
+		return true
+	}
+	for i := range numEscResults {
+		if l.Result(i) >= 0 {
+			return true
+		}
+	}
+	return false
+}
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@ -22,6 +22,7 @@ import (
 	"cmd/compile/internal/pkginit"
 	"cmd/compile/internal/reflectdata"
 	"cmd/compile/internal/rttype"
+	"cmd/compile/internal/slice"
 	"cmd/compile/internal/ssa"
 	"cmd/compile/internal/ssagen"
 	"cmd/compile/internal/staticinit"
@ -271,6 +272,8 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	base.Timer.Start("fe", "escapes")
 	escape.Funcs(typecheck.Target.Funcs)

+	slice.Funcs(typecheck.Target.Funcs)
+
 	loopvar.LogTransformations(transformed)

 	// Collect information for go:nowritebarrierrec
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@ -192,6 +192,7 @@ type CallExpr struct {
 	IsDDD     bool
 	GoDefer   bool // whether this call is part of a go or defer statement
 	NoInline  bool // whether this call must not be inlined
+	UseBuf    bool // use stack buffer for backing store (OAPPEND only)
 }

 func NewCallExpr(pos src.XPos, op Op, fun Node, args []Node) *CallExpr {
@ -1280,3 +1281,28 @@ func MethodExprFunc(n Node) *types.Field {
 	base.Fatalf("unexpected node: %v (%v)", n, n.Op())
 	panic("unreachable")
 }
+
+// A MoveToHeapExpr takes a slice as input and moves it to the
+// heap (by copying the backing store if it is not already
+// on the heap).
+type MoveToHeapExpr struct {
+	miniExpr
+	Slice Node
+	// An expression that evaluates to a *runtime._type
+	// that represents the slice element type.
+	RType Node
+	// If PreserveCapacity is true, the capacity of
+	// the resulting slice, and all of the elements in
+	// [len:cap], must be preserved.
+	// If PreserveCapacity is false, the resulting
+	// slice may have any capacity >= len, with any
+	// elements in the resulting [len:cap] range zeroed.
+	PreserveCapacity bool
+}
+
+func NewMoveToHeapExpr(pos src.XPos, slice Node) *MoveToHeapExpr {
+	n := &MoveToHeapExpr{Slice: slice}
+	n.pos = pos
+	n.op = OMOVE2HEAP
+	return n
+}
--- a/src/cmd/compile/internal/ir/fmt.go
+++ b/src/cmd/compile/internal/ir/fmt.go
@ -574,7 +574,7 @@ func exprFmt(n Node, s fmt.State, prec int) {
 		// Special case for rune constants.
 		if typ == types.RuneType || typ == types.UntypedRune {
 			if x, ok := constant.Uint64Val(val); ok && x <= utf8.MaxRune {
-				fmt.Fprintf(s, "%q", x)
+				fmt.Fprintf(s, "%q", rune(x))
 				return
 			}
 		}
--- a/src/cmd/compile/internal/ir/name.go
+++ b/src/cmd/compile/internal/ir/name.go
@ -43,7 +43,7 @@ type Name struct {
 	Func      *Func // TODO(austin): nil for I.M
 	Offset_   int64
 	val       constant.Value
-	Opt       any      // for use by escape analysis
+	Opt       any      // for use by escape or slice analysis
 	Embed     *[]Embed // list of embedded files, for ONAME var

 	// For a local variable (not param) or extern, the initializing assignment (OAS or OAS2).
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@ -293,6 +293,7 @@ const (
 	OLINKSYMOFFSET   // offset within a name
 	OJUMPTABLE       // A jump table structure for implementing dense expression switches
 	OINTERFACESWITCH // A type switch with interface cases
+	OMOVE2HEAP       // Promote a stack-backed slice to heap

 	// opcodes for generics
 	ODYNAMICDOTTYPE  // x = i.(T) where T is a type parameter (or derived from a type parameter)
--- a/src/cmd/compile/internal/ir/node_gen.go
+++ b/src/cmd/compile/internal/ir/node_gen.go
@ -1175,6 +1175,34 @@ func (n *MakeExpr) editChildrenWithHidden(edit func(Node) Node) {
 	}
 }

+func (n *MoveToHeapExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *MoveToHeapExpr) copy() Node {
+	c := *n
+	c.init = copyNodes(c.init)
+	return &c
+}
+func (n *MoveToHeapExpr) doChildren(do func(Node) bool) bool {
+	if doNodes(n.init, do) {
+		return true
+	}
+	if n.Slice != nil && do(n.Slice) {
+		return true
+	}
+	return false
+}
+func (n *MoveToHeapExpr) doChildrenWithHidden(do func(Node) bool) bool {
+	return n.doChildren(do)
+}
+func (n *MoveToHeapExpr) editChildren(edit func(Node) Node) {
+	editNodes(n.init, edit)
+	if n.Slice != nil {
+		n.Slice = edit(n.Slice).(Node)
+	}
+}
+func (n *MoveToHeapExpr) editChildrenWithHidden(edit func(Node) Node) {
+	n.editChildren(edit)
+}
+
 func (n *Name) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }

 func (n *NilExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@ -151,18 +151,19 @@ func _() {
 	_ = x[OLINKSYMOFFSET-140]
 	_ = x[OJUMPTABLE-141]
 	_ = x[OINTERFACESWITCH-142]
-	_ = x[ODYNAMICDOTTYPE-143]
-	_ = x[ODYNAMICDOTTYPE2-144]
-	_ = x[ODYNAMICTYPE-145]
-	_ = x[OTAILCALL-146]
-	_ = x[OGETG-147]
-	_ = x[OGETCALLERSP-148]
-	_ = x[OEND-149]
+	_ = x[OMOVE2HEAP-143]
+	_ = x[ODYNAMICDOTTYPE-144]
+	_ = x[ODYNAMICDOTTYPE2-145]
+	_ = x[ODYNAMICTYPE-146]
+	_ = x[OTAILCALL-147]
+	_ = x[OGETG-148]
+	_ = x[OGETCALLERSP-149]
+	_ = x[OEND-150]
 }

-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHMOVE2HEAPDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"

-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 878, 892, 907, 918, 926, 930, 941, 944}

 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
--- a/src/cmd/compile/internal/ir/stmt.go
+++ b/src/cmd/compile/internal/ir/stmt.go
@ -42,6 +42,7 @@ func (*Decl) isStmt() {}
 type Stmt interface {
 	Node
 	isStmt()
+	PtrInit() *Nodes
 }

 // A miniStmt is a miniNode with extra fields common to statements.
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@ -29,6 +29,11 @@ type symsStruct struct {
 	GCWriteBarrier            [8]*obj.LSym
 	Goschedguarded            *obj.LSym
 	Growslice                 *obj.LSym
+	GrowsliceBuf              *obj.LSym
+	MoveSlice                 *obj.LSym
+	MoveSliceNoScan           *obj.LSym
+	MoveSliceNoCap            *obj.LSym
+	MoveSliceNoCapNoScan      *obj.LSym
 	InterfaceSwitch           *obj.LSym
 	MallocGC                  *obj.LSym
 	MallocGCSmallNoScan       [27]*obj.LSym
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@ -575,6 +575,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 	case ssa.OpLOONG64LoweredZeroLoop:
 		ptrReg := v.Args[0].Reg()
 		countReg := v.RegTmp()
+		flagReg := int16(loong64.REGTMP)
 		var off int64
 		n := v.AuxInt
 		loopSize := int64(64)
@ -587,58 +588,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 			//   vs
 			//     16 instuctions in the straightline code
 			//   Might as well use straightline code.
-			v.Fatalf("ZeroLoop size tool small %d", n)
+			v.Fatalf("ZeroLoop size too small %d", n)
 		}

-		// Put iteration count in a register.
 		//    MOVV    $n/loopSize, countReg
-		p := s.Prog(loong64.AMOVV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = n / loopSize
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = countReg
-		cntInit := p
+		//    MOVBU   ir.Syms.Loong64HasLSX, flagReg
+		//    BNE     flagReg, lsxInit
+		// genericInit:
+		//    for off = 0; off < loopSize; off += 8 {
+		//            zero8(s, ptrReg, off)
+		//    }
+		//    ADDV    $loopSize, ptrReg
+		//    SUBV    $1, countReg
+		//    BNE     countReg, genericInit
+		//    JMP     tail
+		// lsxInit:
+		//    VXORV   V31, V31, V31, v31 = 0
+		//    for off = 0; off < loopSize; off += 16 {
+		//            zero16(s, V31, ptrReg, off)
+		//    }
+		//    ADDV    $loopSize, ptrReg
+		//    SUBV    $1, countReg
+		//    BNE     countReg, lsxInit
+		// tail:
+		//    n %= loopSize
+		//    for off = 0; n >= 8; off += 8, n -= 8 {
+		//            zero8(s, ptrReg, off)
+		//    }
+		//
+		//    if n != 0 {
+		//           zero8(s, ptrReg, off+n-8)
+		//    }

-		// Zero loopSize bytes starting at ptrReg.
-		for range loopSize / 8 {
-			// MOVV     ZR, off(ptrReg)
+		p1 := s.Prog(loong64.AMOVV)
+		p1.From.Type = obj.TYPE_CONST
+		p1.From.Offset = n / loopSize
+		p1.To.Type = obj.TYPE_REG
+		p1.To.Reg = countReg
+
+		p2 := s.Prog(loong64.AMOVBU)
+		p2.From.Type = obj.TYPE_MEM
+		p2.From.Name = obj.NAME_EXTERN
+		p2.From.Sym = ir.Syms.Loong64HasLSX
+		p2.To.Type = obj.TYPE_REG
+		p2.To.Reg = flagReg
+
+		p3 := s.Prog(loong64.ABNE)
+		p3.From.Type = obj.TYPE_REG
+		p3.From.Reg = flagReg
+		p3.To.Type = obj.TYPE_BRANCH
+
+		for off = 0; off < loopSize; off += 8 {
 			zero8(s, ptrReg, off)
-			off += 8
 		}

-		// Increment ptrReg by loopSize.
-		//   ADDV     $loopSize, ptrReg
-		p = s.Prog(loong64.AADDV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = loopSize
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = ptrReg
+		p4 := s.Prog(loong64.AADDV)
+		p4.From.Type = obj.TYPE_CONST
+		p4.From.Offset = loopSize
+		p4.To.Type = obj.TYPE_REG
+		p4.To.Reg = ptrReg

-		// Decrement loop count.
-		//   SUBV     $1, countReg
-		p = s.Prog(loong64.ASUBV)
-		p.From.Type = obj.TYPE_CONST
-		p.From.Offset = 1
-		p.To.Type = obj.TYPE_REG
-		p.To.Reg = countReg
+		p5 := s.Prog(loong64.ASUBV)
+		p5.From.Type = obj.TYPE_CONST
+		p5.From.Offset = 1
+		p5.To.Type = obj.TYPE_REG
+		p5.To.Reg = countReg

-		// Jump to loop header if we're not done yet.
-		//   BNE     countReg, loop header
-		p = s.Prog(loong64.ABNE)
-		p.From.Type = obj.TYPE_REG
-		p.From.Reg = countReg
-		p.To.Type = obj.TYPE_BRANCH
-		p.To.SetTarget(cntInit.Link)
+		p6 := s.Prog(loong64.ABNE)
+		p6.From.Type = obj.TYPE_REG
+		p6.From.Reg = countReg
+		p6.To.Type = obj.TYPE_BRANCH
+		p6.To.SetTarget(p3.Link)
+
+		p7 := s.Prog(obj.AJMP)
+		p7.To.Type = obj.TYPE_BRANCH
+
+		p8 := s.Prog(loong64.AVXORV)
+		p8.From.Type = obj.TYPE_REG
+		p8.From.Reg = loong64.REG_V31
+		p8.To.Type = obj.TYPE_REG
+		p8.To.Reg = loong64.REG_V31
+		p3.To.SetTarget(p8)
+
+		for off = 0; off < loopSize; off += 16 {
+			zero16(s, loong64.REG_V31, ptrReg, off)
+		}
+
+		p9 := s.Prog(loong64.AADDV)
+		p9.From.Type = obj.TYPE_CONST
+		p9.From.Offset = loopSize
+		p9.To.Type = obj.TYPE_REG
+		p9.To.Reg = ptrReg
+
+		p10 := s.Prog(loong64.ASUBV)
+		p10.From.Type = obj.TYPE_CONST
+		p10.From.Offset = 1
+		p10.To.Type = obj.TYPE_REG
+		p10.To.Reg = countReg
+
+		p11 := s.Prog(loong64.ABNE)
+		p11.From.Type = obj.TYPE_REG
+		p11.From.Reg = countReg
+		p11.To.Type = obj.TYPE_BRANCH
+		p11.To.SetTarget(p8.Link)
+
+		p12 := s.Prog(obj.ANOP)
+		p7.To.SetTarget(p12)

 		// Multiples of the loop size are now done.
 		n %= loopSize
-
-		off = 0
 		// Write any fractional portion.
-		for n >= 8 {
+		for off = 0; n >= 8; off += 8 {
 			// MOVV   ZR, off(ptrReg)
 			zero8(s, ptrReg, off)
-			off += 8
 			n -= 8
 		}

@ -1341,3 +1403,14 @@ func zero8(s *ssagen.State, reg int16, off int64) {
 	p.To.Reg = reg
 	p.To.Offset = off
 }
+
+// zero16 zeroes 16 bytes at reg+off.
+func zero16(s *ssagen.State, regZero, regBase int16, off int64) {
+	// VMOVQ   regZero, off(regBase)
+	p := s.Prog(loong64.AVMOVQ)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = regZero
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = regBase
+	p.To.Offset = off
+}
--- a/src/cmd/compile/internal/slice/slice.go
+++ b/src/cmd/compile/internal/slice/slice.go
@ -0,0 +1,455 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package slice
+
+// This file implements a stack-allocation optimization
+// for the backing store of slices.
+//
+// Consider the code:
+//
+//     var s []int
+//     for i := range ... {
+//        s = append(s, i)
+//     }
+//     return s
+//
+// Some of the append operations will need to do an allocation
+// by calling growslice. This will happen on the 1st, 2nd, 4th,
+// 8th, etc. append calls. The allocations done by all but the
+// last growslice call will then immediately be garbage.
+//
+// We'd like to avoid doing some of those intermediate
+// allocations if possible.
+//
+// If we can determine that the "return s" statement is the
+// *only* way that the backing store for s escapes, then we
+// can rewrite the code to something like:
+//
+//     var s []int
+//     for i := range N {
+//        s = append(s, i)
+//     }
+//     s = move2heap(s)
+//     return s
+//
+// Using the move2heap runtime function, which does:
+//
+//     move2heap(s):
+//         If s is not backed by a stackframe-allocated
+//         backing store, return s. Otherwise, copy s
+//         to the heap and return the copy.
+//
+// Now we can treat the backing store of s allocated at the
+// append site as not escaping. Previous stack allocation
+// optimizations now apply, which can use a fixed-size
+// stack-allocated backing store for s when appending.
+// (See ../ssagen/ssa.go:(*state).append)
+//
+// It is tricky to do this optimization safely. To describe
+// our analysis, we first define what an "exclusive" slice
+// variable is.
+//
+// A slice variable (a variable of slice type) is called
+// "exclusive" if, when it has a reference to a
+// stackframe-allocated backing store, it is the only
+// variable with such a reference.
+//
+// In other words, a slice variable is exclusive if
+// any of the following holds:
+//  1) It points to a heap-allocated backing store
+//  2) It points to a stack-allocated backing store
+//     for any parent frame.
+//  3) It is the only variable that references its
+//     backing store.
+//  4) It is nil.
+//
+// The nice thing about exclusive slice variables is that
+// it is always safe to do
+//    s = move2heap(s)
+// whenever s is an exclusive slice variable. Because no
+// one else has a reference to the backing store, no one
+// else can tell that we moved the backing store from one
+// location to another.
+//
+// Note that exclusiveness is a dynamic property. A slice
+// variable may be exclusive during some parts of execution
+// and not exclusive during others.
+//
+// The following operations set or preserve the exclusivity
+// of a slice variable s:
+//     s = nil
+//     s = append(s, ...)
+//     s = s[i:j]
+//     ... = s[i]
+//     s[i] = ...
+//     f(s) where f does not escape its argument
+// Other operations destroy exclusivity. A non-exhaustive list includes:
+//     x = s
+//     *p = s
+//     f(s) where f escapes its argument
+//     return s
+// To err on the safe side, we white list exclusivity-preserving
+// operations and we asssume that any other operations that mention s
+// destroy its exclusivity.
+//
+// Our strategy is to move the backing store of s to the heap before
+// any exclusive->nonexclusive transition. That way, s will only ever
+// have a reference to a stack backing store while it is exclusive.
+//
+// move2heap for a variable s is implemented with:
+//     if s points to within the stack frame {
+//         s2 := make([]T, s.len, s.cap)
+//         copy(s2[:s.cap], s[:s.cap])
+//         s = s2
+//     }
+// Note that in general we need to copy all of s[:cap(s)] elements when
+// moving to the heap. As an optimization, we keep track of slice variables
+// whose capacity, and the elements in s[len(s):cap(s)], are never accessed.
+// For those slice variables, we can allocate to the next size class above
+// the length, which saves memory and copying cost.
+
+import (
+	"cmd/compile/internal/base"
+	"cmd/compile/internal/escape"
+	"cmd/compile/internal/ir"
+	"cmd/compile/internal/reflectdata"
+)
+
+func Funcs(all []*ir.Func) {
+	if base.Flag.N != 0 {
+		return
+	}
+	for _, fn := range all {
+		analyze(fn)
+	}
+}
+
+func analyze(fn *ir.Func) {
+	type sliceInfo struct {
+		// Slice variable.
+		s *ir.Name
+
+		// Count of uses that this pass understands.
+		okUses int32
+		// Count of all uses found.
+		allUses int32
+
+		// A place where the slice variable transitions from
+		// exclusive to nonexclusive.
+		// We could keep track of more than one, but one is enough for now.
+		// Currently, this can be either a return statement or
+		// an assignment.
+		// TODO: other possible transitions?
+		transition ir.Stmt
+
+		// Each s = append(s, ...) instance we found.
+		appends []*ir.CallExpr
+
+		// Weight of the number of s = append(s, ...) instances we found.
+		// The optimizations we do are only really useful if there are at
+		// least weight 2. (Note: appends in loops have weight >= 2.)
+		appendWeight int
+
+		// Whether we ever do cap(s), or other operations that use cap(s)
+		// (possibly implicitly), like s[i:j].
+		capUsed bool
+	}
+
+	// Every variable (*ir.Name) that we are tracking will have
+	// a non-nil *sliceInfo in its Opt field.
+	haveLocalSlice := false
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	var namedRets []*ir.Name
+	for _, s := range fn.Dcl {
+		if !s.Type().IsSlice() {
+			continue
+		}
+		if s.Type().Elem().Size() > maxStackSize {
+			continue
+		}
+		if !base.VariableMakeHash.MatchPos(s.Pos(), nil) {
+			continue
+		}
+		s.Opt = &sliceInfo{s: s} // start tracking s
+		haveLocalSlice = true
+		if s.Class == ir.PPARAMOUT {
+			namedRets = append(namedRets, s)
+		}
+	}
+	if !haveLocalSlice {
+		return
+	}
+
+	// Keep track of loop depth while walking.
+	loopDepth := 0
+
+	// tracking returns the info for the slice variable if n is a slice
+	// variable that we're still considering, or nil otherwise.
+	tracking := func(n ir.Node) *sliceInfo {
+		if n == nil || n.Op() != ir.ONAME {
+			return nil
+		}
+		s := n.(*ir.Name)
+		if s.Opt == nil {
+			return nil
+		}
+		return s.Opt.(*sliceInfo)
+	}
+
+	// addTransition(n, loc) records that s experiences an exclusive->nonexclusive
+	// transition somewhere within loc.
+	addTransition := func(i *sliceInfo, loc ir.Stmt) {
+		if i.transition != nil {
+			// We only keep track of a single exclusive->nonexclusive transition
+			// for a slice variable. If we find more than one, give up.
+			// (More than one transition location would be fine, but we would
+			// start to get worried about introducing too much additional code.)
+			i.s.Opt = nil
+			return
+		}
+		i.transition = loc
+	}
+
+	// Examine an x = y assignment that occurs somewhere within statement stmt.
+	assign := func(x, y ir.Node, stmt ir.Stmt) {
+		if i := tracking(x); i != nil {
+			// s = y. Check for understood patterns for y.
+			if y == nil || y.Op() == ir.ONIL {
+				// s = nil is ok.
+				i.okUses++
+			} else if y.Op() == ir.OSLICELIT {
+				// s = []{...} is ok.
+				// Note: this reveals capacity. Should it?
+				i.okUses++
+				i.capUsed = true
+			} else if y.Op() == ir.OSLICE {
+				y := y.(*ir.SliceExpr)
+				if y.X == i.s {
+					// s = s[...:...] is ok
+					i.okUses += 2
+					i.capUsed = true
+				}
+			} else if y.Op() == ir.OAPPEND {
+				y := y.(*ir.CallExpr)
+				if y.Args[0] == i.s {
+					// s = append(s, ...) is ok
+					i.okUses += 2
+					i.appends = append(i.appends, y)
+					i.appendWeight += 1 + loopDepth
+				}
+				// TODO: s = append(nil, ...)?
+			}
+			// Note that technically s = make([]T, ...) preserves exclusivity, but
+			// we don't track that because we assume users who wrote that know
+			// better than the compiler does.
+
+			// TODO: figure out how to handle s = fn(..., s, ...)
+			// It would be nice to maintain exclusivity of s in this situation.
+			// But unfortunately, fn can return one of its other arguments, which
+			// may be a slice with a stack-allocated backing store other than s.
+			// (which may have preexisting references to its backing store).
+			//
+			// Maybe we could do it if s is the only argument?
+		}
+
+		if i := tracking(y); i != nil {
+			// ... = s
+			// Treat this as an exclusive->nonexclusive transition.
+			i.okUses++
+			addTransition(i, stmt)
+		}
+	}
+
+	var do func(ir.Node) bool
+	do = func(n ir.Node) bool {
+		if n == nil {
+			return false
+		}
+		switch n.Op() {
+		case ir.ONAME:
+			if i := tracking(n); i != nil {
+				// A use of a slice variable. Count it.
+				i.allUses++
+			}
+		case ir.ODCL:
+			n := n.(*ir.Decl)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OINDEX:
+			n := n.(*ir.IndexExpr)
+			if i := tracking(n.X); i != nil {
+				// s[i] is ok.
+				i.okUses++
+			}
+		case ir.OLEN:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// len(s) is ok
+				i.okUses++
+			}
+		case ir.OCAP:
+			n := n.(*ir.UnaryExpr)
+			if i := tracking(n.X); i != nil {
+				// cap(s) is ok
+				i.okUses++
+				i.capUsed = true
+			}
+		case ir.OADDR:
+			n := n.(*ir.AddrExpr)
+			if n.X.Op() == ir.OINDEX {
+				n := n.X.(*ir.IndexExpr)
+				if i := tracking(n.X); i != nil {
+					// &s[i] is definitely a nonexclusive transition.
+					// (We need this case because s[i] is ok, but &s[i] is not.)
+					i.s.Opt = nil
+				}
+			}
+		case ir.ORETURN:
+			n := n.(*ir.ReturnStmt)
+			for _, x := range n.Results {
+				if i := tracking(x); i != nil {
+					i.okUses++
+					// We go exclusive->nonexclusive here
+					addTransition(i, n)
+				}
+			}
+			if len(n.Results) == 0 {
+				// Uses of named result variables are implicit here.
+				for _, x := range namedRets {
+					if i := tracking(x); i != nil {
+						addTransition(i, n)
+					}
+				}
+			}
+		case ir.OCALLFUNC:
+			n := n.(*ir.CallExpr)
+			for idx, arg := range n.Args {
+				if i := tracking(arg); i != nil {
+					if !argLeak(n, idx) {
+						// Passing s to a nonescaping arg is ok.
+						i.okUses++
+						i.capUsed = true
+					}
+				}
+			}
+		case ir.ORANGE:
+			// Range over slice is ok.
+			n := n.(*ir.RangeStmt)
+			if i := tracking(n.X); i != nil {
+				i.okUses++
+			}
+		case ir.OAS:
+			n := n.(*ir.AssignStmt)
+			assign(n.X, n.Y, n)
+		case ir.OAS2:
+			n := n.(*ir.AssignListStmt)
+			for i := range len(n.Lhs) {
+				assign(n.Lhs[i], n.Rhs[i], n)
+			}
+		case ir.OCLOSURE:
+			n := n.(*ir.ClosureExpr)
+			for _, v := range n.Func.ClosureVars {
+				do(v.Outer)
+			}
+		}
+		if n.Op() == ir.OFOR || n.Op() == ir.ORANGE {
+			// Note: loopDepth isn't really right for init portion
+			// of the for statement, but that's ok. Correctness
+			// does not depend on depth info.
+			loopDepth++
+			defer func() { loopDepth-- }()
+		}
+		// Check all the children.
+		ir.DoChildren(n, do)
+		return false
+	}
+
+	// Run the analysis over the whole body.
+	for _, stmt := range fn.Body {
+		do(stmt)
+	}
+
+	// Process accumulated info to find slice variables
+	// that we can allocate on the stack.
+	for _, s := range fn.Dcl {
+		if s.Opt == nil {
+			continue
+		}
+		i := s.Opt.(*sliceInfo)
+		s.Opt = nil
+		if i.okUses != i.allUses {
+			// Some use of i.s that don't understand lurks. Give up.
+			continue
+		}
+
+		// At this point, we've decided that we *can* do
+		// the optimization.
+
+		if i.transition == nil {
+			// Exclusive for its whole lifetime. That means it
+			// didn't escape. We can already handle nonescaping
+			// slices without this pass.
+			continue
+		}
+		if i.appendWeight < 2 {
+			// This optimization only really helps if there is
+			// (dynamically) more than one append.
+			continue
+		}
+
+		// Commit point - at this point we've decided we *should*
+		// do the optimization.
+
+		// Insert a move2heap operation before the exclusive->nonexclusive
+		// transition.
+		move := ir.NewMoveToHeapExpr(i.transition.Pos(), i.s)
+		if i.capUsed {
+			move.PreserveCapacity = true
+		}
+		move.RType = reflectdata.AppendElemRType(i.transition.Pos(), i.appends[0])
+		move.SetType(i.s.Type())
+		move.SetTypecheck(1)
+		as := ir.NewAssignStmt(i.transition.Pos(), i.s, move)
+		as.SetTypecheck(1)
+		i.transition.PtrInit().Prepend(as)
+		// Note: we prepend because we need to put the move2heap
+		// operation first, before any other init work, as the transition
+		// might occur in the init work.
+
+		// Now that we've inserted a move2heap operation before every
+		// exclusive -> nonexclusive transition, appends can now use
+		// stack backing stores.
+		// (This is the whole point of this pass, to enable stack
+		// allocation of append backing stores.)
+		for _, a := range i.appends {
+			a.SetEsc(ir.EscNone)
+			if i.capUsed {
+				a.UseBuf = true
+			}
+		}
+	}
+}
+
+// argLeak reports if the idx'th argument to the call n escapes anywhere
+// (to the heap, another argument, return value, etc.)
+// If unknown returns true.
+func argLeak(n *ir.CallExpr, idx int) bool {
+	if n.Op() != ir.OCALLFUNC {
+		return true
+	}
+	fn := ir.StaticCalleeName(ir.StaticValue(n.Fun))
+	if fn == nil {
+		return true
+	}
+	fntype := fn.Type()
+	if recv := fntype.Recv(); recv != nil {
+		if idx == 0 {
+			return escape.ParseLeaks(recv.Note).Any()
+		}
+		idx--
+	}
+	return escape.ParseLeaks(fntype.Params()[idx].Note).Any()
+}
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@ -156,6 +156,7 @@ func init() {
 		gp11sb         = regInfo{inputs: []regMask{gpspsbg}, outputs: gponly}
 		gp21           = regInfo{inputs: []regMask{gp, gp}, outputs: gponly}
 		gp21sp         = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly}
+		gp21sp2        = regInfo{inputs: []regMask{gp, gpsp}, outputs: gponly}
 		gp21sb         = regInfo{inputs: []regMask{gpspsbg, gpsp}, outputs: gponly}
 		gp21shift      = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}}
 		gp31shift      = regInfo{inputs: []regMask{gp, gp, cx}, outputs: []regMask{gp}}
@ -361,7 +362,7 @@ func init() {
 		{name: "ADDQconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},
 		{name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},

-		{name: "SUBQ", argLength: 2, reg: gp21, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
+		{name: "SUBQ", argLength: 2, reg: gp21sp2, asm: "SUBQ", resultInArg0: true, clobberFlags: true},
 		{name: "SUBL", argLength: 2, reg: gp21, asm: "SUBL", resultInArg0: true, clobberFlags: true},
 		{name: "SUBQconst", argLength: 1, reg: gp11, asm: "SUBQ", aux: "Int32", resultInArg0: true, clobberFlags: true},
 		{name: "SUBLconst", argLength: 1, reg: gp11, asm: "SUBL", aux: "Int32", resultInArg0: true, clobberFlags: true},
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@ -573,6 +573,8 @@
 (TBNZ [0] (GreaterThanF  cc) yes no) => (FGT cc yes no)
 (TBNZ [0] (GreaterEqualF cc) yes no) => (FGE cc yes no)

+(TB(Z|NZ) [0] (XORconst [1] x) yes no) => (TB(NZ|Z) [0] x yes no)
+
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TST                x y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] x:(ANDconst [c] y)) yes no) && x.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTconst         [c] y) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(AND        x y)) yes no) && z.Uses == 1 => ((EQ|NE|LT|LE|GT|GE) (TSTW               x y) yes no)
@ -1814,3 +1816,7 @@

 (Select0 (Mul64uover x y)) => (MUL x y)
 (Select1 (Mul64uover x y)) => (NotEqual (CMPconst (UMULH <typ.UInt64> x y) [0]))
+
+// 32 mul 32 -> 64
+(MUL r:(MOVWUreg x) s:(MOVWUreg y)) && r.Uses == 1 && s.Uses == 1 => (UMULL x y)
+(MUL r:(MOVWreg  x) s:(MOVWreg  y)) && r.Uses == 1 && s.Uses == 1 =>  (MULL x y)
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@ -743,9 +743,6 @@

 (MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}

-(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c]))
-(MULV (NEGV x) (NEGV y)) => (MULV x y)
-
 (ADDV x0 x1:(SLLVconst [c] y)) && x1.Uses == 1 && c > 0 && c <= 4 => (ADDshiftLLV x0 y [c])

 // fold constant in ADDshift op
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@ -388,6 +388,7 @@ func init() {
 			argLength: 2,
 			reg: regInfo{
 				inputs:       []regMask{gp},
+				clobbers:     buildReg("F31"),
 				clobbersArg0: true,
 			},
 			faultOnNilArg0: true,
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@ -689,36 +689,36 @@
 (MOVDnop (MOVDconst [c])) => (MOVDconst [c])

 // Avoid unnecessary zero and sign extension when right shifting.
-(SRAI <t> [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW <t> [int64(x)] y)
-(SRLI <t> [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW <t> [int64(x)] y)
+(SRAI [x] (MOVWreg  y)) && x >= 0 && x <= 31 => (SRAIW [x] y)
+(SRLI [x] (MOVWUreg y)) && x >= 0 && x <= 31 => (SRLIW [x] y)

 // Replace right shifts that exceed size of signed type.
 (SRAI <t> [x] (MOVBreg y)) && x >=  8 => (SRAI  [63] (SLLI <t> [56] y))
 (SRAI <t> [x] (MOVHreg y)) && x >= 16 => (SRAI  [63] (SLLI <t> [48] y))
-(SRAI <t> [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)
+(SRAI     [x] (MOVWreg y)) && x >= 32 => (SRAIW [31] y)

 // Eliminate right shifts that exceed size of unsigned type.
-(SRLI <t> [x] (MOVBUreg y)) && x >=  8 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVHUreg y)) && x >= 16 => (MOVDconst <t> [0])
-(SRLI <t> [x] (MOVWUreg y)) && x >= 32 => (MOVDconst <t> [0])
+(SRLI [x] (MOVBUreg y)) && x >=  8 => (MOVDconst [0])
+(SRLI [x] (MOVHUreg y)) && x >= 16 => (MOVDconst [0])
+(SRLI [x] (MOVWUreg y)) && x >= 32 => (MOVDconst [0])

 // Fold constant into immediate instructions where possible.
 (ADD (MOVDconst <t> [val]) x) && is32Bit(val) && !t.IsPtr() => (ADDI [val] x)
 (AND (MOVDconst [val]) x) && is32Bit(val) => (ANDI [val] x)
 (OR  (MOVDconst [val]) x) && is32Bit(val) => (ORI  [val] x)
 (XOR (MOVDconst [val]) x) && is32Bit(val) => (XORI [val] x)
-(ROL  x (MOVDconst [val])) => (RORI  [int64(int8(-val)&63)] x)
-(ROLW x (MOVDconst [val])) => (RORIW [int64(int8(-val)&31)] x)
-(ROR  x (MOVDconst [val])) => (RORI  [int64(val&63)] x)
-(RORW x (MOVDconst [val])) => (RORIW [int64(val&31)] x)
-(SLL  x (MOVDconst [val])) => (SLLI [int64(val&63)] x)
-(SRL  x (MOVDconst [val])) => (SRLI [int64(val&63)] x)
-(SLLW x (MOVDconst [val])) => (SLLIW [int64(val&31)] x)
-(SRLW x (MOVDconst [val])) => (SRLIW [int64(val&31)] x)
-(SRA  x (MOVDconst [val])) => (SRAI [int64(val&63)] x)
-(SRAW x (MOVDconst [val])) => (SRAIW [int64(val&31)] x)
-(SLT  x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTI  [val] x)
-(SLTU x (MOVDconst [val])) && val >= -2048 && val <= 2047 => (SLTIU [val] x)
+(ROL  x (MOVDconst [val])) => (RORI  [-val&63] x)
+(ROLW x (MOVDconst [val])) => (RORIW [-val&31] x)
+(ROR  x (MOVDconst [val])) => (RORI  [val&63] x)
+(RORW x (MOVDconst [val])) => (RORIW [val&31] x)
+(SLL  x (MOVDconst [val])) => (SLLI  [val&63] x)
+(SLLW x (MOVDconst [val])) => (SLLIW [val&31] x)
+(SRL  x (MOVDconst [val])) => (SRLI  [val&63] x)
+(SRLW x (MOVDconst [val])) => (SRLIW [val&31] x)
+(SRA  x (MOVDconst [val])) => (SRAI  [val&63] x)
+(SRAW x (MOVDconst [val])) => (SRAIW [val&31] x)
+(SLT  x (MOVDconst [val])) && is12Bit(val) => (SLTI  [val] x)
+(SLTU x (MOVDconst [val])) && is12Bit(val) => (SLTIU [val] x)

 // Replace negated left rotation with right rotation.
 (ROL  x (NEG y)) => (ROR  x y)
@ -782,7 +782,7 @@
 (SRAI [x] (MOVDconst [y])) => (MOVDconst [int64(y) >> uint32(x)])

 // Combine doubling via addition with shift.
-(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI <t> [c+1] x)
+(SLLI <t> [c] (ADD x x)) && c < t.Size() * 8 - 1 => (SLLI [c+1] x)
 (SLLI <t> [c] (ADD x x)) && c >= t.Size() * 8 - 1 => (MOVDconst [0])

 // SLTI/SLTIU with constants.
@ -792,7 +792,6 @@
 // SLTI/SLTIU with known outcomes.
 (SLTI  [x] (ANDI [y] _)) && y >= 0 && int64(y) < int64(x) => (MOVDconst [1])
 (SLTIU [x] (ANDI [y] _)) && y >= 0 && uint64(y) < uint64(x) => (MOVDconst [1])
-(SLTI  [x] (ORI  [y] _)) && y >= 0 && int64(y) >= int64(x) => (MOVDconst [0])
 (SLTIU [x] (ORI  [y] _)) && y >= 0 && uint64(y) >= uint64(x) => (MOVDconst [0])

 // SLT/SLTU with known outcomes.
--- a/src/cmd/compile/internal/ssa/_gen/dec.rules
+++ b/src/cmd/compile/internal/ssa/_gen/dec.rules
@ -97,8 +97,10 @@
 // Helpers for expand calls
 // Some of these are copied from generic.rules

-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)

 (StructSelect [i] x:(StructMake ___)) => x.Args[i]

@ -109,7 +111,7 @@
 // More annoying case: (ArraySelect[0] (StructSelect[0] isAPtr))
 // There, result of the StructSelect is an Array (not a pointer) and
 // the pre-rewrite input to the ArraySelect is a struct, not a pointer.
-(StructSelect [0] x) && x.Type.IsPtrShaped()  => x
+(StructSelect x) && x.Type.IsPtrShaped()  => x
 (ArraySelect [0] x) && x.Type.IsPtrShaped()  => x

 // These, too.  Bits is bits.
@ -119,6 +121,7 @@

 (Store _ (StructMake ___) _) => rewriteStructStore(v)

+(IMake _typ (ArrayMake1 val)) => (IMake _typ val)
 (ArraySelect (ArrayMake1 x)) => x
 (ArraySelect [0] (IData x)) => (IData x)

--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@ -195,6 +195,11 @@
 // Convert x * -1 to -x.
 (Mul(8|16|32|64)  (Const(8|16|32|64)  [-1]) x) => (Neg(8|16|32|64)  x)

+// Convert -x * c to x * -c
+(Mul(8|16|32|64) (Const(8|16|32|64) <t> [c]) (Neg(8|16|32|64) x)) => (Mul(8|16|32|64) x (Const(8|16|32|64) <t> [-c]))
+
+(Mul(8|16|32|64) (Neg(8|16|32|64) x) (Neg(8|16|32|64) y)) => (Mul(8|16|32|64) x y)
+
 // DeMorgan's Laws
 (And(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (Or(8|16|32|64) <t> x y))
 (Or(8|16|32|64) <t> (Com(8|16|32|64) x) (Com(8|16|32|64) y)) => (Com(8|16|32|64) (And(8|16|32|64) <t> x y))
@ -337,6 +342,12 @@
 (OrB ((Less|Leq)16U (Const16 [c]) x) (Leq16U x (Const16 [d]))) && uint16(c) >= uint16(d+1) && uint16(d+1) > uint16(d) => ((Less|Leq)16U (Const16 <x.Type> [c-d-1]) (Sub16 <x.Type> x (Const16 <x.Type> [d+1])))
 (OrB ((Less|Leq)8U  (Const8  [c]) x) (Leq8U  x (Const8  [d]))) && uint8(c)  >= uint8(d+1)  && uint8(d+1)  > uint8(d)  => ((Less|Leq)8U  (Const8  <x.Type> [c-d-1]) (Sub8  <x.Type> x (Const8  <x.Type> [d+1])))

+// single bit difference: ( x != c && x != d ) -> ( x|(c^d) != c )
+(AndB (Neq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Neq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Neq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
+// single bit difference: ( x == c || x == d ) -> ( x|(c^d) == c )
+(OrB (Eq(64|32|16|8) x cv:(Const(64|32|16|8) [c])) (Eq(64|32|16|8) x (Const(64|32|16|8) [d]))) && c|d == c && oneBit(c^d) => (Eq(64|32|16|8) (Or(64|32|16|8) <x.Type> x (Const(64|32|16|8) <x.Type> [c^d])) cv)
+
 // NaN check: ( x != x || x (>|>=|<|<=) c ) -> ( !(c (>=|>|<=|<) x) )
 (OrB (Neq64F x x) ((Less|Leq)64F x y:(Const64F [c]))) => (Not ((Leq|Less)64F y x))
 (OrB (Neq64F x x) ((Less|Leq)64F y:(Const64F [c]) x)) => (Not ((Leq|Less)64F x y))
@ -933,8 +944,10 @@
  @x.Block (Load <v.Type> (OffPtr <v.Type.PtrTo()> [t.FieldOff(int(i))] ptr) mem)

 // Putting struct{*byte} and similar into direct interfaces.
-(IMake _typ (StructMake val)) => (IMake _typ val)
-(StructSelect [0] (IData x)) => (IData x)
+(IMake _typ (StructMake ___)) => imakeOfStructMake(v)
+(StructSelect (IData x)) && v.Type.Size() > 0 => (IData x)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsStruct() => (StructMake)
+(StructSelect (IData x)) && v.Type.Size() == 0 && v.Type.IsArray() => (ArrayMake0)

 // un-SSAable values use mem->mem copies
 (Store {t} dst (Load src mem) mem) && !CanSSA(t) =>
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@ -426,7 +426,14 @@ func (x *expandState) decomposeAsNecessary(pos src.XPos, b *Block, a, m0 *Value,
 		if a.Op == OpIMake {
 			data := a.Args[1]
 			for data.Op == OpStructMake || data.Op == OpArrayMake1 {
-				data = data.Args[0]
+				// A struct make might have a few zero-sized fields.
+				// Use the pointer-y one we know is there.
+				for _, a := range data.Args {
+					if a.Type.Size() > 0 {
+						data = a
+						break
+					}
+				}
 			}
 			return x.decomposeAsNecessary(pos, b, data, mem, rc.next(data.Type))
 		}
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@ -10,7 +10,9 @@ import (
 )

 // fuseEarly runs fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck).
-func fuseEarly(f *Func) { fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeNanCheck) }
+func fuseEarly(f *Func) {
+	fuse(f, fuseTypePlain|fuseTypeIntInRange|fuseTypeSingleBitDifference|fuseTypeNanCheck)
+}

 // fuseLate runs fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect).
 func fuseLate(f *Func) { fuse(f, fuseTypePlain|fuseTypeIf|fuseTypeBranchRedirect) }
@ -21,6 +23,7 @@ const (
 	fuseTypePlain fuseType = 1 << iota
 	fuseTypeIf
 	fuseTypeIntInRange
+	fuseTypeSingleBitDifference
 	fuseTypeNanCheck
 	fuseTypeBranchRedirect
 	fuseTypeShortCircuit
@ -41,6 +44,9 @@ func fuse(f *Func, typ fuseType) {
 			if typ&fuseTypeIntInRange != 0 {
 				changed = fuseIntInRange(b) || changed
 			}
+			if typ&fuseTypeSingleBitDifference != 0 {
+				changed = fuseSingleBitDifference(b) || changed
+			}
 			if typ&fuseTypeNanCheck != 0 {
 				changed = fuseNanCheck(b) || changed
 			}
--- a/src/cmd/compile/internal/ssa/fuse_comparisons.go
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@ -19,6 +19,14 @@ func fuseNanCheck(b *Block) bool {
 	return fuseComparisons(b, canOptNanCheck)
 }

+// fuseSingleBitDifference replaces the short-circuit operators between equality checks with
+// constants that only differ by a single bit. For example, it would convert
+// `if x == 4 || x == 6 { ... }` into `if (x == 4) | (x == 6) { ... }`. Rewrite rules can
+// then optimize these using a bitwise operation, in this case generating `if x|2 == 6 { ... }`.
+func fuseSingleBitDifference(b *Block) bool {
+	return fuseComparisons(b, canOptSingleBitDifference)
+}
+
 // fuseComparisons looks for control graphs that match this pattern:
 //
 //	p - predecessor
@ -229,3 +237,40 @@ func canOptNanCheck(x, y *Value, op Op) bool {
 	}
 	return false
 }
+
+// canOptSingleBitDifference returns true if x op y matches either:
+//
+//	v == c || v == d
+//	v != c && v != d
+//
+// Where c and d are constant values that differ by a single bit.
+func canOptSingleBitDifference(x, y *Value, op Op) bool {
+	if x.Op != y.Op {
+		return false
+	}
+	switch x.Op {
+	case OpEq64, OpEq32, OpEq16, OpEq8:
+		if op != OpOrB {
+			return false
+		}
+	case OpNeq64, OpNeq32, OpNeq16, OpNeq8:
+		if op != OpAndB {
+			return false
+		}
+	default:
+		return false
+	}
+
+	xi := getConstIntArgIndex(x)
+	if xi < 0 {
+		return false
+	}
+	yi := getConstIntArgIndex(y)
+	if yi < 0 {
+		return false
+	}
+	if x.Args[xi^1] != y.Args[yi^1] {
+		return false
+	}
+	return oneBit(x.Args[xi].AuxInt ^ y.Args[yi].AuxInt)
+}
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@ -11481,7 +11481,7 @@ var opcodeTable = [...]opInfo{
 		reg: regInfo{
 			inputs: []inputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
-				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 			outputs: []outputInfo{
 				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
@ -68770,6 +68770,7 @@ var opcodeTable = [...]opInfo{
 			inputs: []inputInfo{
 				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
 			},
+			clobbers:     2305843009213693952, // F31
 			clobbersArg0: true,
 		},
 	},
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@ -466,57 +466,56 @@ func (ft *factsTable) initLimitForNewValue(v *Value) {

 // signedMin records the fact that we know v is at least
 // min in the signed domain.
-func (ft *factsTable) signedMin(v *Value, min int64) bool {
-	return ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMin(v *Value, min int64) {
+	ft.newLimit(v, limit{min: min, max: math.MaxInt64, umin: 0, umax: math.MaxUint64})
 }

 // signedMax records the fact that we know v is at most
 // max in the signed domain.
-func (ft *factsTable) signedMax(v *Value, max int64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMax(v *Value, max int64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: max, umin: 0, umax: math.MaxUint64})
 }
-func (ft *factsTable) signedMinMax(v *Value, min, max int64) bool {
-	return ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
+func (ft *factsTable) signedMinMax(v *Value, min, max int64) {
+	ft.newLimit(v, limit{min: min, max: max, umin: 0, umax: math.MaxUint64})
 }

 // setNonNegative records the fact that v is known to be non-negative.
-func (ft *factsTable) setNonNegative(v *Value) bool {
-	return ft.signedMin(v, 0)
+func (ft *factsTable) setNonNegative(v *Value) {
+	ft.signedMin(v, 0)
 }

 // unsignedMin records the fact that we know v is at least
 // min in the unsigned domain.
-func (ft *factsTable) unsignedMin(v *Value, min uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
+func (ft *factsTable) unsignedMin(v *Value, min uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: math.MaxUint64})
 }

 // unsignedMax records the fact that we know v is at most
 // max in the unsigned domain.
-func (ft *factsTable) unsignedMax(v *Value, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
+func (ft *factsTable) unsignedMax(v *Value, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: 0, umax: max})
 }
-func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) bool {
-	return ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
+func (ft *factsTable) unsignedMinMax(v *Value, min, max uint64) {
+	ft.newLimit(v, limit{min: math.MinInt64, max: math.MaxInt64, umin: min, umax: max})
 }

-func (ft *factsTable) booleanFalse(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) booleanFalse(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) booleanTrue(v *Value) bool {
-	return ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
+func (ft *factsTable) booleanTrue(v *Value) {
+	ft.newLimit(v, limit{min: 1, max: 1, umin: 1, umax: 1})
 }
-func (ft *factsTable) pointerNil(v *Value) bool {
-	return ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
+func (ft *factsTable) pointerNil(v *Value) {
+	ft.newLimit(v, limit{min: 0, max: 0, umin: 0, umax: 0})
 }
-func (ft *factsTable) pointerNonNil(v *Value) bool {
+func (ft *factsTable) pointerNonNil(v *Value) {
 	l := noLimit
 	l.umin = 1
-	return ft.newLimit(v, l)
+	ft.newLimit(v, l)
 }

 // newLimit adds new limiting information for v.
-// Returns true if the new limit added any new information.
-func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
+func (ft *factsTable) newLimit(v *Value, newLim limit) {
 	oldLim := ft.limits[v.ID]

 	// Merge old and new information.
@ -531,13 +530,12 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	}

 	if lim == oldLim {
-		return false // nothing new to record
+		return // nothing new to record
 	}

 	if lim.unsat() {
-		r := !ft.unsat
 		ft.unsat = true
-		return r
+		return
 	}

 	// Check for recursion. This normally happens because in unsatisfiable
@ -548,7 +546,7 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 	// the posets will not notice.
 	if ft.recurseCheck[v.ID] {
 		// This should only happen for unsatisfiable cases. TODO: check
-		return false
+		return
 	}
 	ft.recurseCheck[v.ID] = true
 	defer func() {
@ -713,8 +711,6 @@ func (ft *factsTable) newLimit(v *Value, newLim limit) bool {
 			}
 		}
 	}
-
-	return true
 }

 func (ft *factsTable) addOrdering(v, w *Value, d domain, r relation) {
@ -1825,7 +1821,7 @@ func initLimit(v *Value) limit {
 	return lim
 }

-// flowLimit updates the known limits of v in ft. Returns true if anything changed.
+// flowLimit updates the known limits of v in ft.
 // flowLimit can use the ranges of input arguments.
 //
 // Note: this calculation only happens at the point the value is defined. We do not reevaluate
@ -1838,10 +1834,10 @@ func initLimit(v *Value) limit {
 // block. We could recompute the range of v once we enter the block so
 // we know that it is 0 <= v <= 8, but we don't have a mechanism to do
 // that right now.
-func (ft *factsTable) flowLimit(v *Value) bool {
+func (ft *factsTable) flowLimit(v *Value) {
 	if !v.Type.IsInteger() {
 		// TODO: boolean?
-		return false
+		return
 	}

 	// Additional limits based on opcode and argument.
@ -1851,36 +1847,36 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 	// extensions
 	case OpZeroExt8to64, OpZeroExt8to32, OpZeroExt8to16, OpZeroExt16to64, OpZeroExt16to32, OpZeroExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v, a.umin, a.umax)
+		ft.unsignedMinMax(v, a.umin, a.umax)
 	case OpSignExt8to64, OpSignExt8to32, OpSignExt8to16, OpSignExt16to64, OpSignExt16to32, OpSignExt32to64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.signedMinMax(v, a.min, a.max)
+		ft.signedMinMax(v, a.min, a.max)
 	case OpTrunc64to8, OpTrunc64to16, OpTrunc64to32, OpTrunc32to8, OpTrunc32to16, OpTrunc16to8:
 		a := ft.limits[v.Args[0].ID]
 		if a.umax <= 1<<(uint64(v.Type.Size())*8)-1 {
-			return ft.unsignedMinMax(v, a.umin, a.umax)
+			ft.unsignedMinMax(v, a.umin, a.umax)
 		}

 	// math/bits
 	case OpCtz64:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
+			ft.unsignedMax(v, uint64(bits.Len64(a.umax)-1))
 		}
 	case OpCtz32:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len32(uint32(a.umax))-1))
 		}
 	case OpCtz16:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len16(uint16(a.umax))-1))
 		}
 	case OpCtz8:
 		a := ft.limits[v.Args[0].ID]
 		if a.nonzero() {
-			return ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
+			ft.unsignedMax(v, uint64(bits.Len8(uint8(a.umax))-1))
 		}

 	case OpPopCount64, OpPopCount32, OpPopCount16, OpPopCount8:
@ -1889,26 +1885,26 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		sharedLeadingMask := ^(uint64(1)<<changingBitsCount - 1)
 		fixedBits := a.umax & sharedLeadingMask
 		min := uint64(bits.OnesCount64(fixedBits))
-		return ft.unsignedMinMax(v, min, min+changingBitsCount)
+		ft.unsignedMinMax(v, min, min+changingBitsCount)

 	case OpBitLen64:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len64(a.umin)),
 			uint64(bits.Len64(a.umax)))
 	case OpBitLen32:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len32(uint32(a.umin))),
 			uint64(bits.Len32(uint32(a.umax))))
 	case OpBitLen16:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len16(uint16(a.umin))),
 			uint64(bits.Len16(uint16(a.umax))))
 	case OpBitLen8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			uint64(bits.Len8(uint8(a.umin))),
 			uint64(bits.Len8(uint8(a.umax))))

@ -1921,43 +1917,43 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		// AND can only make the value smaller.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, min(a.umax, b.umax))
+		ft.unsignedMax(v, min(a.umax, b.umax))
 	case OpOr64, OpOr32, OpOr16, OpOr8:
 		// OR can only make the value bigger and can't flip bits proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMinMax(v,
+		ft.unsignedMinMax(v,
 			max(a.umin, b.umin),
 			1<<bits.Len64(a.umax|b.umax)-1)
 	case OpXor64, OpXor32, OpXor16, OpXor8:
 		// XOR can't flip bits that are proved to be zero in both inputs.
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
+		ft.unsignedMax(v, 1<<bits.Len64(a.umax|b.umax)-1)
 	case OpCom64, OpCom32, OpCom16, OpCom8:
 		a := ft.limits[v.Args[0].ID]
-		return ft.newLimit(v, a.com(uint(v.Type.Size())*8))
+		ft.newLimit(v, a.com(uint(v.Type.Size())*8))

 	// Arithmetic.
 	case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.add(b, uint(v.Type.Size())*8))
 	case OpSub64, OpSub32, OpSub16, OpSub8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		sub := ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
-		mod := ft.detectMod(v)
-		inferred := ft.detectSliceLenRelation(v)
-		return sub || mod || inferred
+		ft.newLimit(v, a.sub(b, uint(v.Type.Size())*8))
+		ft.detectMod(v)
+		ft.detectSliceLenRelation(v)
+		ft.detectSubRelations(v)
 	case OpNeg64, OpNeg32, OpNeg16, OpNeg8:
 		a := ft.limits[v.Args[0].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
+		ft.newLimit(v, a.com(bitsize).add(limit{min: 1, max: 1, umin: 1, umax: 1}, bitsize))
 	case OpMul64, OpMul32, OpMul16, OpMul8:
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
-		return ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
+		ft.newLimit(v, a.mul(b, uint(v.Type.Size())*8))
 	case OpLsh64x64, OpLsh64x32, OpLsh64x16, OpLsh64x8,
 		OpLsh32x64, OpLsh32x32, OpLsh32x16, OpLsh32x8,
 		OpLsh16x64, OpLsh16x32, OpLsh16x16, OpLsh16x8,
@ -1965,7 +1961,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		bitsize := uint(v.Type.Size()) * 8
-		return ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
+		ft.newLimit(v, a.mul(b.exp2(bitsize), bitsize))
 	case OpRsh64x64, OpRsh64x32, OpRsh64x16, OpRsh64x8,
 		OpRsh32x64, OpRsh32x32, OpRsh32x16, OpRsh32x8,
 		OpRsh16x64, OpRsh16x32, OpRsh16x16, OpRsh16x8,
@ -1979,7 +1975,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			// Easier to compute min and max of both than to write sign logic.
 			vmin := min(a.min>>b.min, a.min>>b.max)
 			vmax := max(a.max>>b.min, a.max>>b.max)
-			return ft.signedMinMax(v, vmin, vmax)
+			ft.signedMinMax(v, vmin, vmax)
 		}
 	case OpRsh64Ux64, OpRsh64Ux32, OpRsh64Ux16, OpRsh64Ux8,
 		OpRsh32Ux64, OpRsh32Ux32, OpRsh32Ux16, OpRsh32Ux8,
@ -1988,7 +1984,7 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		a := ft.limits[v.Args[0].ID]
 		b := ft.limits[v.Args[1].ID]
 		if b.min >= 0 {
-			return ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
+			ft.unsignedMinMax(v, a.umin>>b.max, a.umax>>b.min)
 		}
 	case OpDiv64, OpDiv32, OpDiv16, OpDiv8:
 		a := ft.limits[v.Args[0].ID]
@ -2008,11 +2004,11 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 		if b.umin > 0 {
 			lim = lim.unsignedMax(a.umax / b.umin)
 		}
-		return ft.newLimit(v, lim)
+		ft.newLimit(v, lim)
 	case OpMod64, OpMod32, OpMod16, OpMod8:
-		return ft.modLimit(true, v, v.Args[0], v.Args[1])
+		ft.modLimit(true, v, v.Args[0], v.Args[1])
 	case OpMod64u, OpMod32u, OpMod16u, OpMod8u:
-		return ft.modLimit(false, v, v.Args[0], v.Args[1])
+		ft.modLimit(false, v, v.Args[0], v.Args[1])

 	case OpPhi:
 		// Compute the union of all the input phis.
@ -2032,9 +2028,8 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 			l.umin = min(l.umin, l2.umin)
 			l.umax = max(l.umax, l2.umax)
 		}
-		return ft.newLimit(v, l)
+		ft.newLimit(v, l)
 	}
-	return false
 }

 // detectSliceLenRelation matches the pattern where
@ -2047,13 +2042,13 @@ func (ft *factsTable) flowLimit(v *Value) bool {
 //
 // Note that "index" is not useed for indexing in this pattern, but
 // in the motivating example (chunked slice iteration) it is.
-func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
+func (ft *factsTable) detectSliceLenRelation(v *Value) {
 	if v.Op != OpSub64 {
-		return false
+		return
 	}

 	if !(v.Args[0].Op == OpSliceLen || v.Args[0].Op == OpSliceCap) {
-		return false
+		return
 	}

 	slice := v.Args[0].Args[0]
@ -2093,13 +2088,54 @@ func (ft *factsTable) detectSliceLenRelation(v *Value) (inferred bool) {
 		if K < 0 { // We hate thinking about overflow
 			continue
 		}
-		inferred = inferred || ft.signedMin(v, K)
+		ft.signedMin(v, K)
+	}
+}
+
+// v must be Sub{64,32,16,8}.
+func (ft *factsTable) detectSubRelations(v *Value) {
+	// v = x-y
+	x := v.Args[0]
+	y := v.Args[1]
+	if x == y {
+		ft.signedMinMax(v, 0, 0)
+		return
+	}
+	xLim := ft.limits[x.ID]
+	yLim := ft.limits[y.ID]
+
+	// Check if we might wrap around. If so, give up.
+	width := uint(v.Type.Size()) * 8
+	if _, ok := safeSub(xLim.min, yLim.max, width); !ok {
+		return // x-y might underflow
+	}
+	if _, ok := safeSub(xLim.max, yLim.min, width); !ok {
+		return // x-y might overflow
+	}
+
+	// Subtracting a positive number only makes
+	// things smaller.
+	if yLim.min >= 0 {
+		ft.update(v.Block, v, x, signed, lt|eq)
+		// TODO: is this worth it?
+		//if yLim.min > 0 {
+		//	ft.update(v.Block, v, x, signed, lt)
+		//}
+	}
+
+	// Subtracting a number from a bigger one
+	// can't go below 0.
+	if ft.orderS.OrderedOrEqual(y, x) {
+		ft.setNonNegative(v)
+		// TODO: is this worth it?
+		//if ft.orderS.Ordered(y, x) {
+		//	ft.signedMin(v, 1)
+		//}
 	}
-	return inferred
 }

 // x%d has been rewritten to x - (x/d)*d.
-func (ft *factsTable) detectMod(v *Value) bool {
+func (ft *factsTable) detectMod(v *Value) {
 	var opDiv, opDivU, opMul, opConst Op
 	switch v.Op {
 	case OpSub64:
@ -2126,36 +2162,37 @@ func (ft *factsTable) detectMod(v *Value) bool {

 	mul := v.Args[1]
 	if mul.Op != opMul {
-		return false
+		return
 	}
 	div, con := mul.Args[0], mul.Args[1]
 	if div.Op == opConst {
 		div, con = con, div
 	}
 	if con.Op != opConst || (div.Op != opDiv && div.Op != opDivU) || div.Args[0] != v.Args[0] || div.Args[1].Op != opConst || div.Args[1].AuxInt != con.AuxInt {
-		return false
+		return
 	}
-	return ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
+	ft.modLimit(div.Op == opDiv, v, v.Args[0], con)
 }

 // modLimit sets v with facts derived from v = p % q.
-func (ft *factsTable) modLimit(signed bool, v, p, q *Value) bool {
+func (ft *factsTable) modLimit(signed bool, v, p, q *Value) {
 	a := ft.limits[p.ID]
 	b := ft.limits[q.ID]
 	if signed {
 		if a.min < 0 && b.min > 0 {
-			return ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			ft.signedMinMax(v, -(b.max - 1), b.max-1)
+			return
 		}
 		if !(a.nonnegative() && b.nonnegative()) {
 			// TODO: we could handle signed limits but I didn't bother.
-			return false
+			return
 		}
 		if a.min >= 0 && b.min > 0 {
 			ft.setNonNegative(v)
 		}
 	}
 	// Underflow in the arithmetic below is ok, it gives to MaxUint64 which does nothing to the limit.
-	return ft.unsignedMax(v, min(a.umax, b.umax-1))
+	ft.unsignedMax(v, min(a.umax, b.umax-1))
 }

 // getBranch returns the range restrictions added by p
@ -2466,10 +2503,7 @@ func addLocalFacts(ft *factsTable, b *Block) {
 				xl := ft.limits[x.ID]
 				y := add.Args[1]
 				yl := ft.limits[y.ID]
-				if unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
-					continue
-				}
-
+				if !unsignedAddOverflows(xl.umax, yl.umax, add.Type) {
 					if xl.umax < uminDivisor {
 						ft.update(b, v, y, unsigned, lt|eq)
 					}
@ -2477,6 +2511,7 @@ func addLocalFacts(ft *factsTable, b *Block) {
 						ft.update(b, v, x, unsigned, lt|eq)
 					}
 				}
+			}
 			ft.update(b, v, v.Args[0], unsigned, lt|eq)
 		case OpMod64, OpMod32, OpMod16, OpMod8:
 			if !ft.isNonNegative(v.Args[0]) || !ft.isNonNegative(v.Args[1]) {
@ -2993,7 +3028,6 @@ func (ft *factsTable) topoSortValuesInBlock(b *Block) {
 	want := f.NumValues()

 	scores := ft.reusedTopoSortScoresTable
-	if len(scores) < want {
 	if want <= cap(scores) {
 		scores = scores[:want]
 	} else {
@ -3003,7 +3037,6 @@ func (ft *factsTable) topoSortValuesInBlock(b *Block) {
 		scores = f.Cache.allocUintSlice(want)
 		ft.reusedTopoSortScoresTable = scores
 	}
-	}

 	for _, v := range b.Values {
 		scores[v.ID] = 0 // sentinel
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@ -596,17 +596,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
 	var c *Value
 	if vi.regs != 0 {
 		// Copy from a register that v is already in.
-		r2 := pickReg(vi.regs)
 		var current *Value
-		if !s.allocatable.contains(r2) {
-			current = v // v is in a fixed register
+		if vi.regs&^s.allocatable != 0 {
+			// v is in a fixed register, prefer that
+			current = v
 		} else {
+			r2 := pickReg(vi.regs)
 			if s.regs[r2].v != v {
 				panic("bad register state")
 			}
 			current = s.regs[r2].c
-		}
 			s.usedSinceBlockStart |= regMask(1) << r2
+		}
 		c = s.curBlock.NewValue1(pos, OpCopy, v.Type, current)
 	} else if v.rematerializeable() {
 		// Rematerialize instead of loading from the spill location.
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@ -2772,3 +2772,17 @@ func panicBoundsCCToAux(p PanicBoundsCC) Aux {
 func isDictArgSym(sym Sym) bool {
 	return sym.(*ir.Name).Sym().Name == typecheck.LocalDictName
 }
+
+// When v is (IMake typ (StructMake ...)), convert to
+// (IMake typ arg) where arg is the pointer-y argument to
+// the StructMake (there must be exactly one).
+func imakeOfStructMake(v *Value) *Value {
+	var arg *Value
+	for _, a := range v.Args[1].Args {
+		if a.Type.Size() > 0 {
+			arg = a
+			break
+		}
+	}
+	return v.Block.NewValue2(v.Pos, OpIMake, v.Type, v.Args[0], arg)
+}
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@ -12556,6 +12556,54 @@ func rewriteValueARM64_OpARM64MUL(v *Value) bool {
 		}
 		break
 	}
+	// match: (MUL r:(MOVWUreg x) s:(MOVWUreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (UMULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWUreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWUreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64UMULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (MUL r:(MOVWreg x) s:(MOVWreg y))
+	// cond: r.Uses == 1 && s.Uses == 1
+	// result: (MULL x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			r := v_0
+			if r.Op != OpARM64MOVWreg {
+				continue
+			}
+			x := r.Args[0]
+			s := v_1
+			if s.Op != OpARM64MOVWreg {
+				continue
+			}
+			y := s.Args[0]
+			if !(r.Uses == 1 && s.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64MULL)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64MULW(v *Value) bool {
@ -25273,6 +25321,37 @@ func rewriteBlockARM64(b *Block) bool {
 			b.resetWithControl(BlockARM64FGE, cc)
 			return true
 		}
+		// match: (TBNZ [0] (XORconst [1] x) yes no)
+		// result: (TBZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
+	case BlockARM64TBZ:
+		// match: (TBZ [0] (XORconst [1] x) yes no)
+		// result: (TBNZ [0] x yes no)
+		for b.Controls[0].Op == OpARM64XORconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 1 {
+				break
+			}
+			x := v_0.Args[0]
+			if auxIntToInt64(b.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockARM64TBNZ, x)
+			b.AuxInt = int64ToAuxInt(0)
+			return true
+		}
 	case BlockARM64UGE:
 		// match: (UGE (FlagConstant [fc]) yes no)
 		// cond: fc.uge()
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@ -5866,7 +5866,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	config := b.Func.Config
-	typ := &b.Func.Config.Types
 	// match: (MULV _ (MOVVconst [0]))
 	// result: (MOVVconst [0])
 	for {
@ -5911,44 +5910,6 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 		}
 		break
 	}
-	// match: (MULV (NEGV x) (MOVVconst [c]))
-	// result: (MULV x (MOVVconst [-c]))
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64MOVVconst {
-				continue
-			}
-			c := auxIntToInt64(v_1.AuxInt)
-			v.reset(OpLOONG64MULV)
-			v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
-			v0.AuxInt = int64ToAuxInt(-c)
-			v.AddArg2(x, v0)
-			return true
-		}
-		break
-	}
-	// match: (MULV (NEGV x) (NEGV y))
-	// result: (MULV x y)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			if v_0.Op != OpLOONG64NEGV {
-				continue
-			}
-			x := v_0.Args[0]
-			if v_1.Op != OpLOONG64NEGV {
-				continue
-			}
-			y := v_1.Args[0]
-			v.reset(OpLOONG64MULV)
-			v.AddArg2(x, y)
-			return true
-		}
-		break
-	}
 	// match: (MULV (MOVVconst [c]) (MOVVconst [d]))
 	// result: (MOVVconst [c*d])
 	for {
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@ -7027,7 +7027,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROL x (MOVDconst [val]))
-	// result: (RORI [int64(int8(-val)&63)] x)
+	// result: (RORI [-val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7035,7 +7035,7 @@ func rewriteValueRISCV64_OpRISCV64ROL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 63))
+		v.AuxInt = int64ToAuxInt(-val & 63)
 		v.AddArg(x)
 		return true
 	}
@ -7057,7 +7057,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROLW x (MOVDconst [val]))
-	// result: (RORIW [int64(int8(-val)&31)] x)
+	// result: (RORIW [-val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7065,7 +7065,7 @@ func rewriteValueRISCV64_OpRISCV64ROLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(int8(-val) & 31))
+		v.AuxInt = int64ToAuxInt(-val & 31)
 		v.AddArg(x)
 		return true
 	}
@ -7087,7 +7087,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (ROR x (MOVDconst [val]))
-	// result: (RORI [int64(val&63)] x)
+	// result: (RORI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7095,7 +7095,7 @@ func rewriteValueRISCV64_OpRISCV64ROR(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@ -7105,7 +7105,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (RORW x (MOVDconst [val]))
-	// result: (RORIW [int64(val&31)] x)
+	// result: (RORIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7113,7 +7113,7 @@ func rewriteValueRISCV64_OpRISCV64RORW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64RORIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@ -7212,7 +7212,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLL x (MOVDconst [val]))
-	// result: (SLLI [int64(val&63)] x)
+	// result: (SLLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7220,7 +7220,7 @@ func rewriteValueRISCV64_OpRISCV64SLL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@ -7246,7 +7246,7 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 	}
 	// match: (SLLI <t> [c] (ADD x x))
 	// cond: c < t.Size() * 8 - 1
-	// result: (SLLI <t> [c+1] x)
+	// result: (SLLI [c+1] x)
 	for {
 		t := v.Type
 		c := auxIntToInt64(v.AuxInt)
@ -7258,7 +7258,6 @@ func rewriteValueRISCV64_OpRISCV64SLLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SLLI)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(c + 1)
 		v.AddArg(x)
 		return true
@ -7286,7 +7285,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLLW x (MOVDconst [val]))
-	// result: (SLLIW [int64(val&31)] x)
+	// result: (SLLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7294,7 +7293,7 @@ func rewriteValueRISCV64_OpRISCV64SLLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SLLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@ -7304,7 +7303,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLT x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTI [val] x)
 	for {
 		x := v_0
@ -7312,7 +7311,7 @@ func rewriteValueRISCV64_OpRISCV64SLT(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTI)
@ -7363,22 +7362,6 @@ func rewriteValueRISCV64_OpRISCV64SLTI(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(1)
 		return true
 	}
-	// match: (SLTI [x] (ORI [y] _))
-	// cond: y >= 0 && int64(y) >= int64(x)
-	// result: (MOVDconst [0])
-	for {
-		x := auxIntToInt64(v.AuxInt)
-		if v_0.Op != OpRISCV64ORI {
-			break
-		}
-		y := auxIntToInt64(v_0.AuxInt)
-		if !(y >= 0 && int64(y) >= int64(x)) {
-			break
-		}
-		v.reset(OpRISCV64MOVDconst)
-		v.AuxInt = int64ToAuxInt(0)
-		return true
-	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64SLTIU(v *Value) bool {
@ -7433,7 +7416,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SLTU x (MOVDconst [val]))
-	// cond: val >= -2048 && val <= 2047
+	// cond: is12Bit(val)
 	// result: (SLTIU [val] x)
 	for {
 		x := v_0
@ -7441,7 +7424,7 @@ func rewriteValueRISCV64_OpRISCV64SLTU(v *Value) bool {
 			break
 		}
 		val := auxIntToInt64(v_1.AuxInt)
-		if !(val >= -2048 && val <= 2047) {
+		if !(is12Bit(val)) {
 			break
 		}
 		v.reset(OpRISCV64SLTIU)
@ -7555,7 +7538,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRA x (MOVDconst [val]))
-	// result: (SRAI [int64(val&63)] x)
+	// result: (SRAI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7563,7 +7546,7 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@ -7572,11 +7555,10 @@ func rewriteValueRISCV64_OpRISCV64SRA(v *Value) bool {
 func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRAIW <t> [int64(x)] y)
+	// result: (SRAIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWreg {
 			break
@ -7586,8 +7568,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRAIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
@ -7633,7 +7614,7 @@ func rewriteValueRISCV64_OpRISCV64SRAI(v *Value) bool {
 		v.AddArg(v0)
 		return true
 	}
-	// match: (SRAI <t> [x] (MOVWreg y))
+	// match: (SRAI [x] (MOVWreg y))
 	// cond: x >= 32
 	// result: (SRAIW [31] y)
 	for {
@ -7668,7 +7649,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRAW x (MOVDconst [val]))
-	// result: (SRAIW [int64(val&31)] x)
+	// result: (SRAIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7676,7 +7657,7 @@ func rewriteValueRISCV64_OpRISCV64SRAW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRAIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
@ -7686,7 +7667,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRL x (MOVDconst [val]))
-	// result: (SRLI [int64(val&63)] x)
+	// result: (SRLI [val&63] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7694,7 +7675,7 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLI)
-		v.AuxInt = int64ToAuxInt(int64(val & 63))
+		v.AuxInt = int64ToAuxInt(val & 63)
 		v.AddArg(x)
 		return true
 	}
@ -7702,11 +7683,10 @@ func rewriteValueRISCV64_OpRISCV64SRL(v *Value) bool {
 }
 func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 	v_0 := v.Args[0]
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 0 && x <= 31
-	// result: (SRLIW <t> [int64(x)] y)
+	// result: (SRLIW [x] y)
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@ -7716,16 +7696,14 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64SRLIW)
-		v.Type = t
-		v.AuxInt = int64ToAuxInt(int64(x))
+		v.AuxInt = int64ToAuxInt(x)
 		v.AddArg(y)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVBUreg y))
+	// match: (SRLI [x] (MOVBUreg y))
 	// cond: x >= 8
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVBUreg {
 			break
@ -7734,15 +7712,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVHUreg y))
+	// match: (SRLI [x] (MOVHUreg y))
 	// cond: x >= 16
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVHUreg {
 			break
@ -7751,15 +7727,13 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
-	// match: (SRLI <t> [x] (MOVWUreg y))
+	// match: (SRLI [x] (MOVWUreg y))
 	// cond: x >= 32
-	// result: (MOVDconst <t> [0])
+	// result: (MOVDconst [0])
 	for {
-		t := v.Type
 		x := auxIntToInt64(v.AuxInt)
 		if v_0.Op != OpRISCV64MOVWUreg {
 			break
@ -7768,7 +7742,6 @@ func rewriteValueRISCV64_OpRISCV64SRLI(v *Value) bool {
 			break
 		}
 		v.reset(OpRISCV64MOVDconst)
-		v.Type = t
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
@ -7790,7 +7763,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
 	// match: (SRLW x (MOVDconst [val]))
-	// result: (SRLIW [int64(val&31)] x)
+	// result: (SRLIW [val&31] x)
 	for {
 		x := v_0
 		if v_1.Op != OpRISCV64MOVDconst {
@ -7798,7 +7771,7 @@ func rewriteValueRISCV64_OpRISCV64SRLW(v *Value) bool {
 		}
 		val := auxIntToInt64(v_1.AuxInt)
 		v.reset(OpRISCV64SRLIW)
-		v.AuxInt = int64ToAuxInt(int64(val & 31))
+		v.AuxInt = int64ToAuxInt(val & 31)
 		v.AddArg(x)
 		return true
 	}
--- a/src/cmd/compile/internal/ssa/rewritedec.go
+++ b/src/cmd/compile/internal/ssa/rewritedec.go
@ -279,11 +279,20 @@ func rewriteValuedec_OpIData(v *Value) bool {
 func rewriteValuedec_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
+	for {
+		if v_1.Op != OpStructMake {
+			break
+		}
+		v.copyOf(imakeOfStructMake(v))
+		return true
+	}
+	// match: (IMake _typ (ArrayMake1 val))
 	// result: (IMake _typ val)
 	for {
 		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpArrayMake1 {
 			break
 		}
 		val := v_1.Args[0]
@ -839,17 +848,47 @@ func rewriteValuedec_OpStructMake(v *Value) bool {
 func rewriteValuedec_OpStructSelect(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	// match: (StructSelect [i] x:(StructMake ___))
 	// result: x.Args[i]
 	for {
@ -861,13 +900,10 @@ func rewriteValuedec_OpStructSelect(v *Value) bool {
 		v.copyOf(x.Args[i])
 		return true
 	}
-	// match: (StructSelect [0] x)
+	// match: (StructSelect x)
 	// cond: x.Type.IsPtrShaped()
 	// result: x
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 {
-			break
-		}
 		x := v_0
 		if !(x.Type.IsPtrShaped()) {
 			break
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@ -5332,6 +5332,182 @@ func rewriteValuegeneric_OpAndB(v *Value) bool {
 		}
 		break
 	}
+	// match: (AndB (Neq64 x cv:(Const64 [c])) (Neq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpNeq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq32 x cv:(Const32 [c])) (Neq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpNeq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq16 x cv:(Const16 [c])) (Neq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpNeq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (AndB (Neq8 x cv:(Const8 [c])) (Neq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Neq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpNeq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpNeq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	return false
 }
 func rewriteValuegeneric_OpArraySelect(v *Value) bool {
@ -8809,16 +8985,13 @@ func rewriteValuegeneric_OpFloor(v *Value) bool {
 func rewriteValuegeneric_OpIMake(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (IMake _typ (StructMake val))
-	// result: (IMake _typ val)
+	// match: (IMake _typ (StructMake ___))
+	// result: imakeOfStructMake(v)
 	for {
-		_typ := v_0
-		if v_1.Op != OpStructMake || len(v_1.Args) != 1 {
+		if v_1.Op != OpStructMake {
 			break
 		}
-		val := v_1.Args[0]
-		v.reset(OpIMake)
-		v.AddArg2(_typ, val)
+		v.copyOf(imakeOfStructMake(v))
 		return true
 	}
 	// match: (IMake _typ (ArrayMake1 val))
@ -16610,6 +16783,45 @@ func rewriteValuegeneric_OpMul16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul16 (Const16 <t> [c]) (Neg16 x))
+	// result: (Mul16 x (Const16 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst16 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt16(v_0.AuxInt)
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul16)
+			v0 := b.NewValue0(v.Pos, OpConst16, t)
+			v0.AuxInt = int16ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul16 (Neg16 x) (Neg16 y))
+	// result: (Mul16 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul16)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul16 (Const16 <t> [c]) (Add16 <t> (Const16 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add16 (Const16 <t> [c*d]) (Mul16 <t> (Const16 <t> [c]) x))
@ -16821,6 +17033,45 @@ func rewriteValuegeneric_OpMul32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul32 (Const32 <t> [c]) (Neg32 x))
+	// result: (Mul32 x (Const32 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst32 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt32(v_0.AuxInt)
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul32)
+			v0 := b.NewValue0(v.Pos, OpConst32, t)
+			v0.AuxInt = int32ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul32 (Neg32 x) (Neg32 y))
+	// result: (Mul32 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul32)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul32 (Const32 <t> [c]) (Add32 <t> (Const32 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add32 (Const32 <t> [c*d]) (Mul32 <t> (Const32 <t> [c]) x))
@ -17193,6 +17444,45 @@ func rewriteValuegeneric_OpMul64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul64 (Const64 <t> [c]) (Neg64 x))
+	// result: (Mul64 x (Const64 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst64 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt64(v_0.AuxInt)
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul64)
+			v0 := b.NewValue0(v.Pos, OpConst64, t)
+			v0.AuxInt = int64ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul64 (Neg64 x) (Neg64 y))
+	// result: (Mul64 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul64)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul64 (Const64 <t> [c]) (Add64 <t> (Const64 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add64 (Const64 <t> [c*d]) (Mul64 <t> (Const64 <t> [c]) x))
@ -17565,6 +17855,45 @@ func rewriteValuegeneric_OpMul8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Mul8 (Const8 <t> [c]) (Neg8 x))
+	// result: (Mul8 x (Const8 <t> [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst8 {
+				continue
+			}
+			t := v_0.Type
+			c := auxIntToInt8(v_0.AuxInt)
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			x := v_1.Args[0]
+			v.reset(OpMul8)
+			v0 := b.NewValue0(v.Pos, OpConst8, t)
+			v0.AuxInt = int8ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (Mul8 (Neg8 x) (Neg8 y))
+	// result: (Mul8 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpNeg8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpMul8)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (Mul8 (Const8 <t> [c]) (Add8 <t> (Const8 <t> [d]) x))
 	// cond: !isPowerOfTwo(c)
 	// result: (Add8 (Const8 <t> [c*d]) (Mul8 <t> (Const8 <t> [c]) x))
@ -23242,6 +23571,182 @@ func rewriteValuegeneric_OpOrB(v *Value) bool {
 		}
 		break
 	}
+	// match: (OrB (Eq64 x cv:(Const64 [c])) (Eq64 x (Const64 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq64 (Or64 <x.Type> x (Const64 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq64 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst64 {
+					continue
+				}
+				c := auxIntToInt64(cv.AuxInt)
+				if v_1.Op != OpEq64 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst64 {
+						continue
+					}
+					d := auxIntToInt64(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq64)
+					v0 := b.NewValue0(v.Pos, OpOr64, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst64, x.Type)
+					v1.AuxInt = int64ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq32 x cv:(Const32 [c])) (Eq32 x (Const32 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq32 (Or32 <x.Type> x (Const32 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq32 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst32 {
+					continue
+				}
+				c := auxIntToInt32(cv.AuxInt)
+				if v_1.Op != OpEq32 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst32 {
+						continue
+					}
+					d := auxIntToInt32(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq32)
+					v0 := b.NewValue0(v.Pos, OpOr32, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst32, x.Type)
+					v1.AuxInt = int32ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq16 x cv:(Const16 [c])) (Eq16 x (Const16 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq16 (Or16 <x.Type> x (Const16 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq16 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst16 {
+					continue
+				}
+				c := auxIntToInt16(cv.AuxInt)
+				if v_1.Op != OpEq16 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst16 {
+						continue
+					}
+					d := auxIntToInt16(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq16)
+					v0 := b.NewValue0(v.Pos, OpOr16, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst16, x.Type)
+					v1.AuxInt = int16ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
+	// match: (OrB (Eq8 x cv:(Const8 [c])) (Eq8 x (Const8 [d])))
+	// cond: c|d == c && oneBit(c^d)
+	// result: (Eq8 (Or8 <x.Type> x (Const8 <x.Type> [c^d])) cv)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpEq8 {
+				continue
+			}
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i1 := 0; _i1 <= 1; _i1, v_0_0, v_0_1 = _i1+1, v_0_1, v_0_0 {
+				x := v_0_0
+				cv := v_0_1
+				if cv.Op != OpConst8 {
+					continue
+				}
+				c := auxIntToInt8(cv.AuxInt)
+				if v_1.Op != OpEq8 {
+					continue
+				}
+				_ = v_1.Args[1]
+				v_1_0 := v_1.Args[0]
+				v_1_1 := v_1.Args[1]
+				for _i2 := 0; _i2 <= 1; _i2, v_1_0, v_1_1 = _i2+1, v_1_1, v_1_0 {
+					if x != v_1_0 || v_1_1.Op != OpConst8 {
+						continue
+					}
+					d := auxIntToInt8(v_1_1.AuxInt)
+					if !(c|d == c && oneBit(c^d)) {
+						continue
+					}
+					v.reset(OpEq8)
+					v0 := b.NewValue0(v.Pos, OpOr8, x.Type)
+					v1 := b.NewValue0(v.Pos, OpConst8, x.Type)
+					v1.AuxInt = int8ToAuxInt(c ^ d)
+					v0.AddArg2(x, v1)
+					v.AddArg2(v0, cv)
+					return true
+				}
+			}
+		}
+		break
+	}
 	// match: (OrB (Neq64F x x) (Less64F x y:(Const64F [c])))
 	// result: (Not (Leq64F y x))
 	for {
@ -31601,17 +32106,47 @@ func rewriteValuegeneric_OpStructSelect(v *Value) bool {
 		v0.AddArg2(v1, mem)
 		return true
 	}
-	// match: (StructSelect [0] (IData x))
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() > 0
 	// result: (IData x)
 	for {
-		if auxIntToInt64(v.AuxInt) != 0 || v_0.Op != OpIData {
+		if v_0.Op != OpIData {
 			break
 		}
 		x := v_0.Args[0]
+		if !(v.Type.Size() > 0) {
+			break
+		}
 		v.reset(OpIData)
 		v.AddArg(x)
 		return true
 	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsStruct()
+	// result: (StructMake)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsStruct()) {
+			break
+		}
+		v.reset(OpStructMake)
+		return true
+	}
+	// match: (StructSelect (IData x))
+	// cond: v.Type.Size() == 0 && v.Type.IsArray()
+	// result: (ArrayMake0)
+	for {
+		if v_0.Op != OpIData {
+			break
+		}
+		if !(v.Type.Size() == 0 && v.Type.IsArray()) {
+			break
+		}
+		v.reset(OpArrayMake0)
+		return true
+	}
 	return false
 }
 func rewriteValuegeneric_OpSub16(v *Value) bool {
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@ -124,6 +124,11 @@ func InitConfig() {
 	ir.Syms.GCWriteBarrier[7] = typecheck.LookupRuntimeFunc("gcWriteBarrier8")
 	ir.Syms.Goschedguarded = typecheck.LookupRuntimeFunc("goschedguarded")
 	ir.Syms.Growslice = typecheck.LookupRuntimeFunc("growslice")
+	ir.Syms.GrowsliceBuf = typecheck.LookupRuntimeFunc("growsliceBuf")
+	ir.Syms.MoveSlice = typecheck.LookupRuntimeFunc("moveSlice")
+	ir.Syms.MoveSliceNoScan = typecheck.LookupRuntimeFunc("moveSliceNoScan")
+	ir.Syms.MoveSliceNoCap = typecheck.LookupRuntimeFunc("moveSliceNoCap")
+	ir.Syms.MoveSliceNoCapNoScan = typecheck.LookupRuntimeFunc("moveSliceNoCapNoScan")
 	ir.Syms.InterfaceSwitch = typecheck.LookupRuntimeFunc("interfaceSwitch")
 	for i := 1; i < len(ir.Syms.MallocGCSmallNoScan); i++ {
 		ir.Syms.MallocGCSmallNoScan[i] = typecheck.LookupRuntimeFunc(fmt.Sprintf("mallocgcSmallNoScanSC%d", i))
@ -1096,6 +1101,23 @@ type state struct {

 	// Block starting position, indexed by block id.
 	blockStarts []src.XPos
+
+	// Information for stack allocation. Indexed by the first argument
+	// to an append call. Normally a slice-typed variable, but not always.
+	backingStores map[ir.Node]*backingStoreInfo
+}
+
+type backingStoreInfo struct {
+	// Size of backing store array (in elements)
+	K int64
+	// Stack-allocated backing store variable.
+	store *ir.Name
+	// Dynamic boolean variable marking the fact that we used this backing store.
+	used *ir.Name
+	// Have we used this variable statically yet? This is just a hint
+	// to avoid checking the dynamic variable if the answer is obvious.
+	// (usedStatic == true implies used == true)
+	usedStatic bool
 }

 type funcLine struct {
@ -3683,6 +3705,9 @@ func (s *state) exprCheckPtr(n ir.Node, checkPtrOK bool) *ssa.Value {
 	case ir.OAPPEND:
 		return s.append(n.(*ir.CallExpr), false)

+	case ir.OMOVE2HEAP:
+		return s.move2heap(n.(*ir.MoveToHeapExpr))
+
 	case ir.OMIN, ir.OMAX:
 		return s.minMax(n.(*ir.CallExpr))

@ -3744,6 +3769,68 @@ func (s *state) resultAddrOfCall(c *ssa.Value, which int64, t *types.Type) *ssa.
 	return addr
 }

+// Get backing store information for an append call.
+func (s *state) getBackingStoreInfoForAppend(n *ir.CallExpr) *backingStoreInfo {
+	if n.Esc() != ir.EscNone {
+		return nil
+	}
+	return s.getBackingStoreInfo(n.Args[0])
+}
+func (s *state) getBackingStoreInfo(n ir.Node) *backingStoreInfo {
+	t := n.Type()
+	et := t.Elem()
+	maxStackSize := int64(base.Debug.VariableMakeThreshold)
+	if et.Size() == 0 || et.Size() > maxStackSize {
+		return nil
+	}
+	if base.Flag.N != 0 {
+		return nil
+	}
+	if !base.VariableMakeHash.MatchPos(n.Pos(), nil) {
+		return nil
+	}
+	i := s.backingStores[n]
+	if i != nil {
+		return i
+	}
+
+	// Build type of backing store.
+	K := maxStackSize / et.Size() // rounds down
+	KT := types.NewArray(et, K)
+	KT.SetNoalg(true)
+	types.CalcArraySize(KT)
+	// Align more than naturally for the type KT. See issue 73199.
+	align := types.NewArray(types.Types[types.TUINTPTR], 0)
+	types.CalcArraySize(align)
+	storeTyp := types.NewStruct([]*types.Field{
+		{Sym: types.BlankSym, Type: align},
+		{Sym: types.BlankSym, Type: KT},
+	})
+	storeTyp.SetNoalg(true)
+	types.CalcStructSize(storeTyp)
+
+	// Make backing store variable.
+	backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
+	backingStore.SetAddrtaken(true)
+
+	// Make "used" boolean.
+	used := typecheck.TempAt(n.Pos(), s.curfn, types.Types[types.TBOOL])
+	if s.curBlock == s.f.Entry {
+		s.vars[used] = s.constBool(false)
+	} else {
+		// initialize this variable at end of entry block
+		s.defvars[s.f.Entry.ID][used] = s.constBool(false)
+	}
+
+	// Initialize an info structure.
+	if s.backingStores == nil {
+		s.backingStores = map[ir.Node]*backingStoreInfo{}
+	}
+	i = &backingStoreInfo{K: K, store: backingStore, used: used, usedStatic: false}
+	s.backingStores[n] = i
+	return i
+}
+
 // append converts an OAPPEND node to SSA.
 // If inplace is false, it converts the OAPPEND expression n to an ssa.Value,
 // adds it to s, and returns the Value.
@ -3834,9 +3921,29 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// A stack-allocated backing store could be used at every
 	// append that qualifies, but we limit it in some cases to
 	// avoid wasted code and stack space.
-	// TODO: handle ... append case.
-	maxStackSize := int64(base.Debug.VariableMakeThreshold)
-	if !inplace && n.Esc() == ir.EscNone && et.Size() > 0 && et.Size() <= maxStackSize && base.Flag.N == 0 && base.VariableMakeHash.MatchPos(n.Pos(), nil) && !s.appendTargets[sn] {
+	//
+	// Note that we have two different strategies.
+	// 1. The standard strategy is just to allocate the full
+	//    backing store at the first append.
+	// 2. An alternate strategy is used when
+	//        a. The backing store eventually escapes via move2heap
+	//    and b. The capacity is used somehow
+	//    In this case, we don't want to just allocate
+	//    the full buffer at the first append, because when
+	//    we move2heap the buffer to the heap when it escapes,
+	//    we might end up wasting memory because we can't
+	//    change the capacity.
+	//    So in this case we use growsliceBuf to reuse the buffer
+	//    and walk one step up the size class ladder each time.
+	//
+	// TODO: handle ... append case? Currently we handle only
+	// a fixed number of appended elements.
+	var info *backingStoreInfo
+	if !inplace {
+		info = s.getBackingStoreInfoForAppend(n)
+	}
+
+	if !inplace && info != nil && !n.UseBuf && !info.usedStatic {
 		// if l <= K {
 		//   if !used {
 		//     if oldLen == 0 {
@ -3860,43 +3967,19 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// It is ok to do it more often, but it is probably helpful only for
 		// the first instance. TODO: this could use more tuning. Using ir.Node
 		// as the key works for *ir.Name instances but probably nothing else.
-		if s.appendTargets == nil {
-			s.appendTargets = map[ir.Node]bool{}
-		}
-		s.appendTargets[sn] = true
-
-		K := maxStackSize / et.Size() // rounds down
-		KT := types.NewArray(et, K)
-		KT.SetNoalg(true)
-		types.CalcArraySize(KT)
-		// Align more than naturally for the type KT. See issue 73199.
-		align := types.NewArray(types.Types[types.TUINTPTR], 0)
-		types.CalcArraySize(align)
-		storeTyp := types.NewStruct([]*types.Field{
-			{Sym: types.BlankSym, Type: align},
-			{Sym: types.BlankSym, Type: KT},
-		})
-		storeTyp.SetNoalg(true)
-		types.CalcStructSize(storeTyp)
+		info.usedStatic = true
+		// TODO: unset usedStatic somehow?

 		usedTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		oldLenTestBlock := s.f.NewBlock(ssa.BlockPlain)
 		bodyBlock := s.f.NewBlock(ssa.BlockPlain)
 		growSlice := s.f.NewBlock(ssa.BlockPlain)
-
-		// Make "used" boolean.
-		tBool := types.Types[types.TBOOL]
-		used := typecheck.TempAt(n.Pos(), s.curfn, tBool)
-		s.defvars[s.f.Entry.ID][used] = s.constBool(false) // initialize this variable at fn entry
-
-		// Make backing store variable.
 		tInt := types.Types[types.TINT]
-		backingStore := typecheck.TempAt(n.Pos(), s.curfn, storeTyp)
-		backingStore.SetAddrtaken(true)
+		tBool := types.Types[types.TBOOL]

 		// if l <= K
 		s.startBlock(grow)
-		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, K))
+		kTest := s.newValue2(s.ssaOp(ir.OLE, tInt), tBool, l, s.constInt(tInt, info.K))
 		b := s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(kTest)
@ -3906,7 +3989,7 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {

 		// if !used
 		s.startBlock(usedTestBlock)
-		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(used))
+		usedTest := s.newValue1(ssa.OpNot, tBool, s.expr(info.used))
 		b = s.endBlock()
 		b.Kind = ssa.BlockIf
 		b.SetControl(usedTest)
@ -3927,18 +4010,18 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 		// var store struct { _ [0]uintptr; arr [K]T }
 		s.startBlock(bodyBlock)
 		if et.HasPointers() {
-			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, backingStore, s.mem())
+			s.vars[memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, info.store, s.mem())
 		}
-		addr := s.addr(backingStore)
-		s.zero(storeTyp, addr)
+		addr := s.addr(info.store)
+		s.zero(info.store.Type(), addr)

 		// s = store.arr[:l:K]
 		s.vars[ptrVar] = addr
 		s.vars[lenVar] = l // nargs would also be ok because of the oldLen==0 test.
-		s.vars[capVar] = s.constInt(tInt, K)
+		s.vars[capVar] = s.constInt(tInt, info.K)

 		// used = true
-		s.assign(used, s.constBool(true), false, 0)
+		s.assign(info.used, s.constBool(true), false, 0)
 		b = s.endBlock()
 		b.AddEdgeTo(assign)

@ -3949,7 +4032,25 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	// Call growslice
 	s.startBlock(grow)
 	taddr := s.expr(n.Fun)
-	r := s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	var r []*ssa.Value
+	if info != nil && n.UseBuf {
+		// Use stack-allocated buffer as backing store, if we can.
+		if et.HasPointers() && !info.usedStatic {
+			// Initialize in the function header. Not the best place,
+			// but it makes sure we don't scan this area before it is
+			// initialized.
+			mem := s.defvars[s.f.Entry.ID][memVar]
+			mem = s.f.Entry.NewValue1A(n.Pos(), ssa.OpVarDef, types.TypeMem, info.store, mem)
+			addr := s.f.Entry.NewValue2A(n.Pos(), ssa.OpLocalAddr, types.NewPtr(info.store.Type()), info.store, s.sp, mem)
+			mem = s.f.Entry.NewValue2I(n.Pos(), ssa.OpZero, types.TypeMem, info.store.Type().Size(), addr, mem)
+			mem.Aux = info.store.Type()
+			s.defvars[s.f.Entry.ID][memVar] = mem
+			info.usedStatic = true
+		}
+		r = s.rtcall(ir.Syms.GrowsliceBuf, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr, s.addr(info.store), s.constInt(types.Types[types.TINT], info.K))
+	} else {
+		r = s.rtcall(ir.Syms.Growslice, true, []*types.Type{n.Type()}, p, l, c, nargs, taddr)
+	}

 	// Decompose output slice
 	p = s.newValue1(ssa.OpSlicePtr, pt, r[0])
@ -4036,6 +4137,95 @@ func (s *state) append(n *ir.CallExpr, inplace bool) *ssa.Value {
 	return s.newValue3(ssa.OpSliceMake, n.Type(), p, l, c)
 }

+func (s *state) move2heap(n *ir.MoveToHeapExpr) *ssa.Value {
+	// s := n.Slice
+	// if s.ptr points to current stack frame {
+	//     s2 := make([]T, s.len, s.cap)
+	//     copy(s2[:cap], s[:cap])
+	//     s = s2
+	// }
+	// return s
+
+	slice := s.expr(n.Slice)
+	et := slice.Type.Elem()
+	pt := types.NewPtr(et)
+
+	info := s.getBackingStoreInfo(n)
+	if info == nil {
+		// Backing store will never be stack allocated, so
+		// move2heap is a no-op.
+		return slice
+	}
+
+	// Decomposse input slice.
+	p := s.newValue1(ssa.OpSlicePtr, pt, slice)
+	l := s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], slice)
+	c := s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], slice)
+
+	moveBlock := s.f.NewBlock(ssa.BlockPlain)
+	mergeBlock := s.f.NewBlock(ssa.BlockPlain)
+
+	s.vars[ptrVar] = p
+	s.vars[lenVar] = l
+	s.vars[capVar] = c
+
+	// Decide if we need to move the slice backing store.
+	// It needs to be moved if it is currently on the stack.
+	sub := ssa.OpSub64
+	less := ssa.OpLess64U
+	if s.config.PtrSize == 4 {
+		sub = ssa.OpSub32
+		less = ssa.OpLess32U
+	}
+	callerSP := s.newValue1(ssa.OpGetCallerSP, types.Types[types.TUINTPTR], s.mem())
+	frameSize := s.newValue2(sub, types.Types[types.TUINTPTR], callerSP, s.sp)
+	pInt := s.newValue2(ssa.OpConvert, types.Types[types.TUINTPTR], p, s.mem())
+	off := s.newValue2(sub, types.Types[types.TUINTPTR], pInt, s.sp)
+	cond := s.newValue2(less, types.Types[types.TBOOL], off, frameSize)
+
+	b := s.endBlock()
+	b.Kind = ssa.BlockIf
+	b.Likely = ssa.BranchUnlikely // fast path is to not have to call into runtime
+	b.SetControl(cond)
+	b.AddEdgeTo(moveBlock)
+	b.AddEdgeTo(mergeBlock)
+
+	// Move the slice to heap
+	s.startBlock(moveBlock)
+	var newSlice *ssa.Value
+	if et.HasPointers() {
+		typ := s.expr(n.RType)
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSlice, true, []*types.Type{slice.Type}, typ, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCap, true, []*types.Type{slice.Type}, typ, p, l)[0]
+		}
+	} else {
+		elemSize := s.constInt(types.Types[types.TUINTPTR], et.Size())
+		if n.PreserveCapacity {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoScan, true, []*types.Type{slice.Type}, elemSize, p, l, c)[0]
+		} else {
+			newSlice = s.rtcall(ir.Syms.MoveSliceNoCapNoScan, true, []*types.Type{slice.Type}, elemSize, p, l)[0]
+		}
+	}
+	// Decompose output slice
+	s.vars[ptrVar] = s.newValue1(ssa.OpSlicePtr, pt, newSlice)
+	s.vars[lenVar] = s.newValue1(ssa.OpSliceLen, types.Types[types.TINT], newSlice)
+	s.vars[capVar] = s.newValue1(ssa.OpSliceCap, types.Types[types.TINT], newSlice)
+	b = s.endBlock()
+	b.AddEdgeTo(mergeBlock)
+
+	// Merge fast path (no moving) and slow path (moved)
+	s.startBlock(mergeBlock)
+	p = s.variable(ptrVar, pt)                      // generates phi for ptr
+	l = s.variable(lenVar, types.Types[types.TINT]) // generates phi for len
+	c = s.variable(capVar, types.Types[types.TINT]) // generates phi for cap
+	delete(s.vars, ptrVar)
+	delete(s.vars, lenVar)
+	delete(s.vars, capVar)
+	return s.newValue3(ssa.OpSliceMake, slice.Type, p, l, c)
+}
+
 // minMax converts an OMIN/OMAX builtin call into SSA.
 func (s *state) minMax(n *ir.CallExpr) *ssa.Value {
 	// The OMIN/OMAX builtin is variadic, but its semantics are
--- a/src/cmd/compile/internal/test/testdata/arith_test.go
+++ b/src/cmd/compile/internal/test/testdata/arith_test.go
@ -444,6 +444,19 @@ func testBitwiseRshU_ssa(a uint32, b, c uint32) uint32 {
 	return a >> b >> c
 }

+//go:noinline
+func orLt_ssa(x int) bool {
+	y := x - x
+	return (x | 2) < y
+}
+
+// test riscv64 SLTI rules
+func testSetIfLessThan(t *testing.T) {
+	if want, got := true, orLt_ssa(-7); got != want {
+		t.Errorf("orLt_ssa(-7) = %t want %t", got, want)
+	}
+}
+
 //go:noinline
 func testShiftCX_ssa() int {
 	v1 := uint8(3)
@ -977,6 +990,7 @@ func TestArithmetic(t *testing.T) {
 	testRegallocCVSpill(t)
 	testSubqToNegq(t)
 	testBitwiseLogic(t)
+	testSetIfLessThan(t)
 	testOcom(t)
 	testLrot(t)
 	testShiftCX(t)
--- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go
+++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go
@ -195,6 +195,7 @@ func makeslice(typ *byte, len int, cap int) unsafe.Pointer
 func makeslice64(typ *byte, len int64, cap int64) unsafe.Pointer
 func makeslicecopy(typ *byte, tolen int, fromlen int, from unsafe.Pointer) unsafe.Pointer
 func growslice(oldPtr *any, newLen, oldCap, num int, et *byte) (ary []any)
+func growsliceBuf(oldPtr *any, newLen, oldCap, num int, et *byte, buf *any, bufLen int) (ary []any)
 func unsafeslicecheckptr(typ *byte, ptr unsafe.Pointer, len int64)
 func panicunsafeslicelen()
 func panicunsafeslicenilptr()
@ -202,6 +203,11 @@ func unsafestringcheckptr(ptr unsafe.Pointer, len int64)
 func panicunsafestringlen()
 func panicunsafestringnilptr()

+func moveSlice(typ *byte, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoScan(elemSize uintptr, old *byte, len, cap int) (*byte, int, int)
+func moveSliceNoCap(typ *byte, old *byte, len int) (*byte, int, int)
+func moveSliceNoCapNoScan(elemSize uintptr, old *byte, len int) (*byte, int, int)
+
 func memmove(to *any, frm *any, length uintptr)
 func memclrNoHeapPointers(ptr unsafe.Pointer, n uintptr)
 func memclrHasPointers(ptr unsafe.Pointer, n uintptr)
--- a/src/cmd/compile/internal/typecheck/builtin.go
+++ b/src/cmd/compile/internal/typecheck/builtin.go
@ -160,80 +160,85 @@ var runtimeDecls = [...]struct {
 	{"makeslice64", funcTag, 124},
 	{"makeslicecopy", funcTag, 125},
 	{"growslice", funcTag, 127},
-	{"unsafeslicecheckptr", funcTag, 128},
+	{"growsliceBuf", funcTag, 128},
+	{"unsafeslicecheckptr", funcTag, 129},
 	{"panicunsafeslicelen", funcTag, 9},
 	{"panicunsafeslicenilptr", funcTag, 9},
-	{"unsafestringcheckptr", funcTag, 129},
+	{"unsafestringcheckptr", funcTag, 130},
 	{"panicunsafestringlen", funcTag, 9},
 	{"panicunsafestringnilptr", funcTag, 9},
-	{"memmove", funcTag, 130},
-	{"memclrNoHeapPointers", funcTag, 131},
-	{"memclrHasPointers", funcTag, 131},
-	{"memequal", funcTag, 132},
-	{"memequal0", funcTag, 133},
-	{"memequal8", funcTag, 133},
-	{"memequal16", funcTag, 133},
-	{"memequal32", funcTag, 133},
-	{"memequal64", funcTag, 133},
-	{"memequal128", funcTag, 133},
-	{"f32equal", funcTag, 134},
-	{"f64equal", funcTag, 134},
-	{"c64equal", funcTag, 134},
-	{"c128equal", funcTag, 134},
-	{"strequal", funcTag, 134},
-	{"interequal", funcTag, 134},
-	{"nilinterequal", funcTag, 134},
-	{"memhash", funcTag, 135},
-	{"memhash0", funcTag, 136},
-	{"memhash8", funcTag, 136},
-	{"memhash16", funcTag, 136},
-	{"memhash32", funcTag, 136},
-	{"memhash64", funcTag, 136},
-	{"memhash128", funcTag, 136},
-	{"f32hash", funcTag, 137},
-	{"f64hash", funcTag, 137},
-	{"c64hash", funcTag, 137},
-	{"c128hash", funcTag, 137},
-	{"strhash", funcTag, 137},
-	{"interhash", funcTag, 137},
-	{"nilinterhash", funcTag, 137},
-	{"int64div", funcTag, 138},
-	{"uint64div", funcTag, 139},
-	{"int64mod", funcTag, 138},
-	{"uint64mod", funcTag, 139},
-	{"float64toint64", funcTag, 140},
-	{"float64touint64", funcTag, 141},
-	{"float64touint32", funcTag, 142},
-	{"int64tofloat64", funcTag, 143},
-	{"int64tofloat32", funcTag, 144},
-	{"uint64tofloat64", funcTag, 145},
-	{"uint64tofloat32", funcTag, 146},
-	{"uint32tofloat64", funcTag, 147},
-	{"complex128div", funcTag, 148},
+	{"moveSlice", funcTag, 131},
+	{"moveSliceNoScan", funcTag, 132},
+	{"moveSliceNoCap", funcTag, 133},
+	{"moveSliceNoCapNoScan", funcTag, 134},
+	{"memmove", funcTag, 135},
+	{"memclrNoHeapPointers", funcTag, 136},
+	{"memclrHasPointers", funcTag, 136},
+	{"memequal", funcTag, 137},
+	{"memequal0", funcTag, 138},
+	{"memequal8", funcTag, 138},
+	{"memequal16", funcTag, 138},
+	{"memequal32", funcTag, 138},
+	{"memequal64", funcTag, 138},
+	{"memequal128", funcTag, 138},
+	{"f32equal", funcTag, 139},
+	{"f64equal", funcTag, 139},
+	{"c64equal", funcTag, 139},
+	{"c128equal", funcTag, 139},
+	{"strequal", funcTag, 139},
+	{"interequal", funcTag, 139},
+	{"nilinterequal", funcTag, 139},
+	{"memhash", funcTag, 140},
+	{"memhash0", funcTag, 141},
+	{"memhash8", funcTag, 141},
+	{"memhash16", funcTag, 141},
+	{"memhash32", funcTag, 141},
+	{"memhash64", funcTag, 141},
+	{"memhash128", funcTag, 141},
+	{"f32hash", funcTag, 142},
+	{"f64hash", funcTag, 142},
+	{"c64hash", funcTag, 142},
+	{"c128hash", funcTag, 142},
+	{"strhash", funcTag, 142},
+	{"interhash", funcTag, 142},
+	{"nilinterhash", funcTag, 142},
+	{"int64div", funcTag, 143},
+	{"uint64div", funcTag, 144},
+	{"int64mod", funcTag, 143},
+	{"uint64mod", funcTag, 144},
+	{"float64toint64", funcTag, 145},
+	{"float64touint64", funcTag, 146},
+	{"float64touint32", funcTag, 147},
+	{"int64tofloat64", funcTag, 148},
+	{"int64tofloat32", funcTag, 149},
+	{"uint64tofloat64", funcTag, 150},
+	{"uint64tofloat32", funcTag, 151},
+	{"uint32tofloat64", funcTag, 152},
+	{"complex128div", funcTag, 153},
 	{"racefuncenter", funcTag, 33},
 	{"racefuncexit", funcTag, 9},
 	{"raceread", funcTag, 33},
 	{"racewrite", funcTag, 33},
-	{"racereadrange", funcTag, 149},
-	{"racewriterange", funcTag, 149},
-	{"msanread", funcTag, 149},
-	{"msanwrite", funcTag, 149},
-	{"msanmove", funcTag, 150},
-	{"asanread", funcTag, 149},
-	{"asanwrite", funcTag, 149},
-	{"checkptrAlignment", funcTag, 151},
-	{"checkptrArithmetic", funcTag, 153},
-	{"libfuzzerTraceCmp1", funcTag, 154},
-	{"libfuzzerTraceCmp2", funcTag, 155},
-	{"libfuzzerTraceCmp4", funcTag, 156},
-	{"libfuzzerTraceCmp8", funcTag, 157},
-	{"libfuzzerTraceConstCmp1", funcTag, 154},
-	{"libfuzzerTraceConstCmp2", funcTag, 155},
-	{"libfuzzerTraceConstCmp4", funcTag, 156},
-	{"libfuzzerTraceConstCmp8", funcTag, 157},
-	{"libfuzzerHookStrCmp", funcTag, 158},
-	{"libfuzzerHookEqualFold", funcTag, 158},
-	{"addCovMeta", funcTag, 160},
+	{"racereadrange", funcTag, 154},
+	{"racewriterange", funcTag, 154},
+	{"msanread", funcTag, 154},
+	{"msanwrite", funcTag, 154},
+	{"msanmove", funcTag, 155},
+	{"asanread", funcTag, 154},
+	{"asanwrite", funcTag, 154},
+	{"checkptrAlignment", funcTag, 156},
+	{"checkptrArithmetic", funcTag, 158},
+	{"libfuzzerTraceCmp1", funcTag, 159},
+	{"libfuzzerTraceCmp2", funcTag, 160},
+	{"libfuzzerTraceCmp4", funcTag, 161},
+	{"libfuzzerTraceCmp8", funcTag, 162},
+	{"libfuzzerTraceConstCmp1", funcTag, 159},
+	{"libfuzzerTraceConstCmp2", funcTag, 160},
+	{"libfuzzerTraceConstCmp4", funcTag, 161},
+	{"libfuzzerTraceConstCmp8", funcTag, 162},
+	{"libfuzzerHookStrCmp", funcTag, 163},
+	{"libfuzzerHookEqualFold", funcTag, 163},
+	{"addCovMeta", funcTag, 165},
 	{"x86HasAVX", varTag, 6},
 	{"x86HasFMA", varTag, 6},
 	{"x86HasPOPCNT", varTag, 6},
@ -244,11 +249,11 @@ var runtimeDecls = [...]struct {
 	{"loong64HasLAM_BH", varTag, 6},
 	{"loong64HasLSX", varTag, 6},
 	{"riscv64HasZbb", varTag, 6},
-	{"asanregisterglobals", funcTag, 131},
+	{"asanregisterglobals", funcTag, 136},
 }

 func runtimeTypes() []*types.Type {
-	var typs [161]*types.Type
+	var typs [166]*types.Type
 	typs[0] = types.ByteType
 	typs[1] = types.NewPtr(typs[0])
 	typs[2] = types.Types[types.TANY]
@ -377,39 +382,44 @@ func runtimeTypes() []*types.Type {
 	typs[125] = newSig(params(typs[1], typs[13], typs[13], typs[7]), params(typs[7]))
 	typs[126] = types.NewSlice(typs[2])
 	typs[127] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1]), params(typs[126]))
-	typs[128] = newSig(params(typs[1], typs[7], typs[22]), nil)
-	typs[129] = newSig(params(typs[7], typs[22]), nil)
-	typs[130] = newSig(params(typs[3], typs[3], typs[5]), nil)
-	typs[131] = newSig(params(typs[7], typs[5]), nil)
-	typs[132] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
-	typs[133] = newSig(params(typs[3], typs[3]), params(typs[6]))
-	typs[134] = newSig(params(typs[7], typs[7]), params(typs[6]))
-	typs[135] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
-	typs[136] = newSig(params(typs[7], typs[5]), params(typs[5]))
-	typs[137] = newSig(params(typs[3], typs[5]), params(typs[5]))
-	typs[138] = newSig(params(typs[22], typs[22]), params(typs[22]))
-	typs[139] = newSig(params(typs[24], typs[24]), params(typs[24]))
-	typs[140] = newSig(params(typs[18]), params(typs[22]))
-	typs[141] = newSig(params(typs[18]), params(typs[24]))
-	typs[142] = newSig(params(typs[18]), params(typs[67]))
-	typs[143] = newSig(params(typs[22]), params(typs[18]))
-	typs[144] = newSig(params(typs[22]), params(typs[20]))
-	typs[145] = newSig(params(typs[24]), params(typs[18]))
-	typs[146] = newSig(params(typs[24]), params(typs[20]))
-	typs[147] = newSig(params(typs[67]), params(typs[18]))
-	typs[148] = newSig(params(typs[26], typs[26]), params(typs[26]))
-	typs[149] = newSig(params(typs[5], typs[5]), nil)
-	typs[150] = newSig(params(typs[5], typs[5], typs[5]), nil)
-	typs[151] = newSig(params(typs[7], typs[1], typs[5]), nil)
-	typs[152] = types.NewSlice(typs[7])
-	typs[153] = newSig(params(typs[7], typs[152]), nil)
-	typs[154] = newSig(params(typs[71], typs[71], typs[15]), nil)
-	typs[155] = newSig(params(typs[65], typs[65], typs[15]), nil)
-	typs[156] = newSig(params(typs[67], typs[67], typs[15]), nil)
-	typs[157] = newSig(params(typs[24], typs[24], typs[15]), nil)
-	typs[158] = newSig(params(typs[30], typs[30], typs[15]), nil)
-	typs[159] = types.NewArray(typs[0], 16)
-	typs[160] = newSig(params(typs[7], typs[67], typs[159], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
+	typs[128] = newSig(params(typs[3], typs[13], typs[13], typs[13], typs[1], typs[3], typs[13]), params(typs[126]))
+	typs[129] = newSig(params(typs[1], typs[7], typs[22]), nil)
+	typs[130] = newSig(params(typs[7], typs[22]), nil)
+	typs[131] = newSig(params(typs[1], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[132] = newSig(params(typs[5], typs[1], typs[13], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[133] = newSig(params(typs[1], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[134] = newSig(params(typs[5], typs[1], typs[13]), params(typs[1], typs[13], typs[13]))
+	typs[135] = newSig(params(typs[3], typs[3], typs[5]), nil)
+	typs[136] = newSig(params(typs[7], typs[5]), nil)
+	typs[137] = newSig(params(typs[3], typs[3], typs[5]), params(typs[6]))
+	typs[138] = newSig(params(typs[3], typs[3]), params(typs[6]))
+	typs[139] = newSig(params(typs[7], typs[7]), params(typs[6]))
+	typs[140] = newSig(params(typs[3], typs[5], typs[5]), params(typs[5]))
+	typs[141] = newSig(params(typs[7], typs[5]), params(typs[5]))
+	typs[142] = newSig(params(typs[3], typs[5]), params(typs[5]))
+	typs[143] = newSig(params(typs[22], typs[22]), params(typs[22]))
+	typs[144] = newSig(params(typs[24], typs[24]), params(typs[24]))
+	typs[145] = newSig(params(typs[18]), params(typs[22]))
+	typs[146] = newSig(params(typs[18]), params(typs[24]))
+	typs[147] = newSig(params(typs[18]), params(typs[67]))
+	typs[148] = newSig(params(typs[22]), params(typs[18]))
+	typs[149] = newSig(params(typs[22]), params(typs[20]))
+	typs[150] = newSig(params(typs[24]), params(typs[18]))
+	typs[151] = newSig(params(typs[24]), params(typs[20]))
+	typs[152] = newSig(params(typs[67]), params(typs[18]))
+	typs[153] = newSig(params(typs[26], typs[26]), params(typs[26]))
+	typs[154] = newSig(params(typs[5], typs[5]), nil)
+	typs[155] = newSig(params(typs[5], typs[5], typs[5]), nil)
+	typs[156] = newSig(params(typs[7], typs[1], typs[5]), nil)
+	typs[157] = types.NewSlice(typs[7])
+	typs[158] = newSig(params(typs[7], typs[157]), nil)
+	typs[159] = newSig(params(typs[71], typs[71], typs[15]), nil)
+	typs[160] = newSig(params(typs[65], typs[65], typs[15]), nil)
+	typs[161] = newSig(params(typs[67], typs[67], typs[15]), nil)
+	typs[162] = newSig(params(typs[24], typs[24], typs[15]), nil)
+	typs[163] = newSig(params(typs[30], typs[30], typs[15]), nil)
+	typs[164] = types.NewArray(typs[0], 16)
+	typs[165] = newSig(params(typs[7], typs[67], typs[164], typs[30], typs[13], typs[71], typs[71]), params(typs[67]))
 	return typs[:]
 }

--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@ -1842,26 +1842,7 @@ func IsReflexive(t *Type) bool {
 // Can this type be stored directly in an interface word?
 // Yes, if the representation is a single pointer.
 func IsDirectIface(t *Type) bool {
-	switch t.Kind() {
-	case TPTR:
-		// Pointers to notinheap types must be stored indirectly. See issue 42076.
-		return !t.Elem().NotInHeap()
-	case TCHAN,
-		TMAP,
-		TFUNC,
-		TUNSAFEPTR:
-		return true
-
-	case TARRAY:
-		// Array of 1 direct iface type can be direct.
-		return t.NumElem() == 1 && IsDirectIface(t.Elem())
-
-	case TSTRUCT:
-		// Struct with 1 field of direct iface type can be direct.
-		return t.NumFields() == 1 && IsDirectIface(t.Field(0).Type)
-	}
-
-	return false
+	return t.Size() == int64(PtrSize) && PtrDataSize(t) == int64(PtrSize)
 }

 // IsInterfaceMethod reports whether (field) m is
--- a/src/cmd/compile/internal/types2/check.go
+++ b/src/cmd/compile/internal/types2/check.go
@ -175,7 +175,8 @@ type Checker struct {
 	methods    map[*TypeName][]*Func    // maps package scope type names to associated non-blank (non-interface) methods
 	untyped    map[syntax.Expr]exprInfo // map of expressions without final type
 	delayed    []action                 // stack of delayed action segments; segments are processed in FIFO order
-	objPath  []Object                 // path of object dependencies during type inference (for cycle reporting)
+	objPath    []Object                 // path of object dependencies during type-checking (for cycle reporting)
+	objPathIdx map[Object]int           // map of object to object path index during type-checking (for cycle reporting)
 	cleaners   []cleaner                // list of types that may need a final cleanup at the end of type-checking

 	// environment within which the current object is type-checked (valid only
@ -248,19 +249,22 @@ func (check *Checker) later(f func()) *action {
 	return &check.delayed[i]
 }

-// push pushes obj onto the object path and returns its index in the path.
-func (check *Checker) push(obj Object) int {
+// push pushes obj onto the object path and records its index in the path index map.
+func (check *Checker) push(obj Object) {
+	if check.objPathIdx == nil {
+		check.objPathIdx = make(map[Object]int)
+	}
+	check.objPathIdx[obj] = len(check.objPath)
 	check.objPath = append(check.objPath, obj)
-	return len(check.objPath) - 1
 }

-// pop pops and returns the topmost object from the object path.
-func (check *Checker) pop() Object {
+// pop pops an object from the object path and removes it from the path index map.
+func (check *Checker) pop() {
 	i := len(check.objPath) - 1
 	obj := check.objPath[i]
-	check.objPath[i] = nil
+	check.objPath[i] = nil // help the garbage collector
 	check.objPath = check.objPath[:i]
-	return obj
+	delete(check.objPathIdx, obj)
 }

 type cleaner interface {
@ -319,6 +323,7 @@ func (check *Checker) initFiles(files []*syntax.File) {
 	check.untyped = nil
 	check.delayed = nil
 	check.objPath = nil
+	check.objPathIdx = nil
 	check.cleaners = nil

 	// We must initialize usedVars and usedPkgNames both here and in NewChecker,
--- a/src/cmd/compile/internal/types2/cycles.go
+++ b/src/cmd/compile/internal/types2/cycles.go
@ -54,7 +54,6 @@ func (check *Checker) directCycle(tname *TypeName, pathIdx map[*TypeName]int) {
 			// tname is marked grey - we have a cycle on the path beginning at start.
 			// Mark tname as invalid.
 			tname.setType(Typ[Invalid])
-			tname.setColor(black)

 			// collect type names on cycle
 			var cycle []Object
--- a/src/cmd/compile/internal/types2/decl.go
+++ b/src/cmd/compile/internal/types2/decl.go
@ -62,114 +62,77 @@ func (check *Checker) objDecl(obj Object, def *TypeName) {
 		if check.indent == 0 {
 			fmt.Println() // empty line between top-level objects for readability
 		}
-		check.trace(obj.Pos(), "-- checking %s (%s, objPath = %s)", obj, obj.color(), pathString(check.objPath))
+		check.trace(obj.Pos(), "-- checking %s (objPath = %s)", obj, pathString(check.objPath))
 		check.indent++
 		defer func() {
 			check.indent--
-			check.trace(obj.Pos(), "=> %s (%s)", obj, obj.color())
+			check.trace(obj.Pos(), "=> %s", obj)
 		}()
 	}

-	// Checking the declaration of obj means inferring its type
-	// (and possibly its value, for constants).
-	// An object's type (and thus the object) may be in one of
-	// three states which are expressed by colors:
+	// Checking the declaration of an object means determining its type
+	// (and also its value for constants). An object (and thus its type)
+	// may be in 1 of 3 states:
 	//
-	// - an object whose type is not yet known is painted white (initial color)
-	// - an object whose type is in the process of being inferred is painted grey
-	// - an object whose type is fully inferred is painted black
+	// - not in Checker.objPathIdx and type == nil : type is not yet known (white)
+	// -     in Checker.objPathIdx                 : type is pending       (grey)
+	// - not in Checker.objPathIdx and type != nil : type is known         (black)
 	//
-	// During type inference, an object's color changes from white to grey
-	// to black (pre-declared objects are painted black from the start).
-	// A black object (i.e., its type) can only depend on (refer to) other black
-	// ones. White and grey objects may depend on white and black objects.
-	// A dependency on a grey object indicates a cycle which may or may not be
-	// valid.
+	// During type-checking, an object changes from white to grey to black.
+	// Predeclared objects start as black (their type is known without checking).
 	//
-	// When objects turn grey, they are pushed on the object path (a stack);
-	// they are popped again when they turn black. Thus, if a grey object (a
-	// cycle) is encountered, it is on the object path, and all the objects
-	// it depends on are the remaining objects on that path. Color encoding
-	// is such that the color value of a grey object indicates the index of
-	// that object in the object path.
+	// A black object may only depend on (refer to) to other black objects. White
+	// and grey objects may depend on white or black objects. A dependency on a
+	// grey object indicates a (possibly invalid) cycle.
+	//
+	// When an object is marked grey, it is pushed onto the object path (a stack)
+	// and its index in the path is recorded in the path index map. It is popped
+	// and removed from the map when its type is determined (and marked black).

-	// During type-checking, white objects may be assigned a type without
-	// traversing through objDecl; e.g., when initializing constants and
-	// variables. Update the colors of those objects here (rather than
-	// everywhere where we set the type) to satisfy the color invariants.
-	if obj.color() == white && obj.Type() != nil {
-		obj.setColor(black)
-		return
-	}
-
-	switch obj.color() {
-	case white:
-		assert(obj.Type() == nil)
-		// All color values other than white and black are considered grey.
-		// Because black and white are < grey, all values >= grey are grey.
-		// Use those values to encode the object's index into the object path.
-		obj.setColor(grey + color(check.push(obj)))
-		defer func() {
-			check.pop().setColor(black)
-		}()
-
-	case black:
-		assert(obj.Type() != nil)
-		return
-
-	default:
-		// Color values other than white or black are considered grey.
-		fallthrough
-
-	case grey:
-		// We have a (possibly invalid) cycle.
-		// In the existing code, this is marked by a non-nil type
-		// for the object except for constants and variables whose
-		// type may be non-nil (known), or nil if it depends on the
-		// not-yet known initialization value.
-		// In the former case, set the type to Typ[Invalid] because
-		// we have an initialization cycle. The cycle error will be
-		// reported later, when determining initialization order.
-		// TODO(gri) Report cycle here and simplify initialization
-		// order code.
+	// If this object is grey, we have a (possibly invalid) cycle. This is signaled
+	// by a non-nil type for the object, except for constants and variables whose
+	// type may be non-nil (known), or nil if it depends on a not-yet known
+	// initialization value.
+	//
+	// In the former case, set the type to Typ[Invalid] because we have an
+	// initialization cycle. The cycle error will be reported later, when
+	// determining initialization order.
+	//
+	// TODO(gri) Report cycle here and simplify initialization order code.
+	if _, ok := check.objPathIdx[obj]; ok {
 		switch obj := obj.(type) {
-		case *Const:
-			if !check.validCycle(obj) || obj.typ == nil {
-				obj.typ = Typ[Invalid]
+		case *Const, *Var:
+			if !check.validCycle(obj) || obj.Type() == nil {
+				obj.setType(Typ[Invalid])
 			}
-
-		case *Var:
-			if !check.validCycle(obj) || obj.typ == nil {
-				obj.typ = Typ[Invalid]
-			}
-
 		case *TypeName:
 			if !check.validCycle(obj) {
-				// break cycle
-				// (without this, calling underlying()
-				// below may lead to an endless loop
-				// if we have a cycle for a defined
-				// (*Named) type)
-				obj.typ = Typ[Invalid]
+				obj.setType(Typ[Invalid])
 			}
-
 		case *Func:
 			if !check.validCycle(obj) {
-				// Don't set obj.typ to Typ[Invalid] here
-				// because plenty of code type-asserts that
-				// functions have a *Signature type. Grey
-				// functions have their type set to an empty
-				// signature which makes it impossible to
+				// Don't set type to Typ[Invalid]; plenty of code asserts that
+				// functions have a *Signature type. Instead, leave the type
+				// as an empty signature, which makes it impossible to
 				// initialize a variable with the function.
 			}
-
 		default:
 			panic("unreachable")
 		}
+
 		assert(obj.Type() != nil)
 		return
 	}

+	if obj.Type() != nil { // black, meaning it's already type-checked
+		return
+	}
+
+	// white, meaning it must be type-checked
+
+	check.push(obj)
+	defer check.pop()
+
 	d := check.objMap[obj]
 	if d == nil {
 		check.dump("%v: %s should have been declared", obj.Pos(), obj)
@ -221,8 +184,8 @@ func (check *Checker) validCycle(obj Object) (valid bool) {
 	}

 	// Count cycle objects.
-	assert(obj.color() >= grey)
-	start := obj.color() - grey // index of obj in objPath
+	start, found := check.objPathIdx[obj]
+	assert(found)
 	cycle := check.objPath[start:]
 	tparCycle := false // if set, the cycle is through a type parameter list
 	nval := 0          // number of (constant or variable) values in the cycle
@ -532,11 +495,16 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN
 				check.collectTypeParams(&alias.tparams, tdecl.TParamList)
 			}

-			rhs = check.definedType(tdecl.Type, obj)
+			rhs = check.declaredType(tdecl.Type, obj)
 			assert(rhs != nil)
-
 			alias.fromRHS = rhs
-			unalias(alias) // populate alias.actual
+
+			// spec: In an alias declaration the given type cannot be a type parameter declared in the same declaration."
+			// (see also go.dev/issue/75884, go.dev/issue/#75885)
+			if tpar, ok := rhs.(*TypeParam); ok && alias.tparams != nil && slices.Index(alias.tparams.list(), tpar) >= 0 {
+				check.error(tdecl.Type, MisplacedTypeParam, "cannot use type parameter declared in alias declaration as RHS")
+				alias.fromRHS = Typ[Invalid]
+			}
 		} else {
 			if !versionErr && tparam0 != nil {
 				check.error(tdecl, UnsupportedFeature, "generic type alias requires GODEBUG=gotypesalias=1 or unset")
@ -576,7 +544,7 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN
 		check.collectTypeParams(&named.tparams, tdecl.TParamList)
 	}

-	rhs = check.definedType(tdecl.Type, obj)
+	rhs = check.declaredType(tdecl.Type, obj)
 	assert(rhs != nil)
 	named.fromRHS = rhs

@ -764,17 +732,8 @@ func (check *Checker) funcDecl(obj *Func, decl *declInfo) {
 	sig := new(Signature)
 	obj.typ = sig // guard against cycles

-	// Avoid cycle error when referring to method while type-checking the signature.
-	// This avoids a nuisance in the best case (non-parameterized receiver type) and
-	// since the method is not a type, we get an error. If we have a parameterized
-	// receiver type, instantiating the receiver type leads to the instantiation of
-	// its methods, and we don't want a cycle error in that case.
-	// TODO(gri) review if this is correct and/or whether we still need this?
-	saved := obj.color_
-	obj.color_ = black
 	fdecl := decl.fdecl
 	check.funcType(sig, fdecl.Recv, fdecl.TParamList, fdecl.Type)
-	obj.color_ = saved

 	// Set the scope's extent to the complete "func (...) { ... }"
 	// so that Scope.Innermost works correctly.
@ -921,10 +880,9 @@ func (check *Checker) declStmt(list []syntax.Decl) {
 			// the innermost containing block."
 			scopePos := s.Name.Pos()
 			check.declare(check.scope, s.Name, obj, scopePos)
-			// mark and unmark type before calling typeDecl; its type is still nil (see Checker.objDecl)
-			obj.setColor(grey + color(check.push(obj)))
+			check.push(obj) // mark as grey
 			check.typeDecl(obj, s, nil)
-			check.pop().setColor(black)
+			check.pop()

 		default:
 			check.errorf(s, InvalidSyntaxTree, "unknown syntax.Decl node %T", s)
--- a/src/cmd/compile/internal/types2/object.go
+++ b/src/cmd/compile/internal/types2/object.go
@ -42,18 +42,12 @@ type Object interface {
 	// 0 for all other objects (including objects in file scopes).
 	order() uint32

-	// color returns the object's color.
-	color() color
-
 	// setType sets the type of the object.
 	setType(Type)

 	// setOrder sets the order number of the object. It must be > 0.
 	setOrder(uint32)

-	// setColor sets the object's color. It must not be white.
-	setColor(color color)
-
 	// setParent sets the parent scope of the object.
 	setParent(*Scope)

@ -102,41 +96,9 @@ type object struct {
 	name      string
 	typ       Type
 	order_    uint32
-	color_    color
 	scopePos_ syntax.Pos
 }

-// color encodes the color of an object (see Checker.objDecl for details).
-type color uint32
-
-// An object may be painted in one of three colors.
-// Color values other than white or black are considered grey.
-const (
-	white color = iota
-	black
-	grey // must be > white and black
-)
-
-func (c color) String() string {
-	switch c {
-	case white:
-		return "white"
-	case black:
-		return "black"
-	default:
-		return "grey"
-	}
-}
-
-// colorFor returns the (initial) color for an object depending on
-// whether its type t is known or not.
-func colorFor(t Type) color {
-	if t != nil {
-		return black
-	}
-	return white
-}
-
 // Parent returns the scope in which the object is declared.
 // The result is nil for methods and struct fields.
 func (obj *object) Parent() *Scope { return obj.parent }
@ -164,13 +126,11 @@ func (obj *object) Id() string { return Id(obj.pkg, obj.name) }

 func (obj *object) String() string       { panic("abstract") }
 func (obj *object) order() uint32        { return obj.order_ }
-func (obj *object) color() color         { return obj.color_ }
 func (obj *object) scopePos() syntax.Pos { return obj.scopePos_ }

 func (obj *object) setParent(parent *Scope)    { obj.parent = parent }
 func (obj *object) setType(typ Type)           { obj.typ = typ }
 func (obj *object) setOrder(order uint32)      { assert(order > 0); obj.order_ = order }
-func (obj *object) setColor(color color)       { assert(color != white); obj.color_ = color }
 func (obj *object) setScopePos(pos syntax.Pos) { obj.scopePos_ = pos }

 func (obj *object) sameId(pkg *Package, name string, foldCase bool) bool {
@ -247,7 +207,7 @@ type PkgName struct {
 // NewPkgName returns a new PkgName object representing an imported package.
 // The remaining arguments set the attributes found with all Objects.
 func NewPkgName(pos syntax.Pos, pkg *Package, name string, imported *Package) *PkgName {
-	return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, black, nopos}, imported}
+	return &PkgName{object{nil, pos, pkg, name, Typ[Invalid], 0, nopos}, imported}
 }

 // Imported returns the package that was imported.
@ -263,7 +223,7 @@ type Const struct {
 // NewConst returns a new constant with value val.
 // The remaining arguments set the attributes found with all Objects.
 func NewConst(pos syntax.Pos, pkg *Package, name string, typ Type, val constant.Value) *Const {
-	return &Const{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, val}
+	return &Const{object{nil, pos, pkg, name, typ, 0, nopos}, val}
 }

 // Val returns the constant's value.
@ -288,7 +248,7 @@ type TypeName struct {
 // argument for NewNamed, which will set the TypeName's type as a side-
 // effect.
 func NewTypeName(pos syntax.Pos, pkg *Package, name string, typ Type) *TypeName {
-	return &TypeName{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}}
+	return &TypeName{object{nil, pos, pkg, name, typ, 0, nopos}}
 }

 // NewTypeNameLazy returns a new defined type like NewTypeName, but it
@ -402,7 +362,7 @@ func NewField(pos syntax.Pos, pkg *Package, name string, typ Type, embedded bool
 // newVar returns a new variable.
 // The arguments set the attributes found with all Objects.
 func newVar(kind VarKind, pos syntax.Pos, pkg *Package, name string, typ Type) *Var {
-	return &Var{object: object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, kind: kind}
+	return &Var{object: object{nil, pos, pkg, name, typ, 0, nopos}, kind: kind}
 }

 // Anonymous reports whether the variable is an embedded field.
@ -452,7 +412,7 @@ func NewFunc(pos syntax.Pos, pkg *Package, name string, sig *Signature) *Func {
 		// as this would violate object.{Type,color} invariants.
 		// TODO(adonovan): propose to disallow NewFunc with nil *Signature.
 	}
-	return &Func{object{nil, pos, pkg, name, typ, 0, colorFor(typ), nopos}, false, nil}
+	return &Func{object{nil, pos, pkg, name, typ, 0, nopos}, false, nil}
 }

 // Signature returns the signature (type) of the function or method.
@ -534,7 +494,7 @@ type Label struct {

 // NewLabel returns a new label.
 func NewLabel(pos syntax.Pos, pkg *Package, name string) *Label {
-	return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid], color_: black}, false}
+	return &Label{object{pos: pos, pkg: pkg, name: name, typ: Typ[Invalid]}, false}
 }

 // A Builtin represents a built-in function.
@ -545,7 +505,7 @@ type Builtin struct {
 }

 func newBuiltin(id builtinId) *Builtin {
-	return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid], color_: black}, id}
+	return &Builtin{object{name: predeclaredFuncs[id].name, typ: Typ[Invalid]}, id}
 }

 // Nil represents the predeclared value nil.
--- a/src/cmd/compile/internal/types2/scope.go
+++ b/src/cmd/compile/internal/types2/scope.go
@ -217,10 +217,8 @@ func (*lazyObject) Exported() bool                     { panic("unreachable") }
 func (*lazyObject) Id() string                         { panic("unreachable") }
 func (*lazyObject) String() string                     { panic("unreachable") }
 func (*lazyObject) order() uint32                      { panic("unreachable") }
-func (*lazyObject) color() color                       { panic("unreachable") }
 func (*lazyObject) setType(Type)                       { panic("unreachable") }
 func (*lazyObject) setOrder(uint32)                    { panic("unreachable") }
-func (*lazyObject) setColor(color color)               { panic("unreachable") }
 func (*lazyObject) setParent(*Scope)                   { panic("unreachable") }
 func (*lazyObject) sameId(*Package, string, bool) bool { panic("unreachable") }
 func (*lazyObject) scopePos() syntax.Pos               { panic("unreachable") }
--- a/src/cmd/compile/internal/types2/sizeof_test.go
+++ b/src/cmd/compile/internal/types2/sizeof_test.go
@ -36,14 +36,14 @@ func TestSizeof(t *testing.T) {
 		{term{}, 12, 24},

 		// Objects
-		{PkgName{}, 60, 96},
-		{Const{}, 64, 104},
-		{TypeName{}, 56, 88},
-		{Var{}, 64, 104},
-		{Func{}, 64, 104},
-		{Label{}, 60, 96},
-		{Builtin{}, 60, 96},
-		{Nil{}, 56, 88},
+		{PkgName{}, 56, 96},
+		{Const{}, 60, 104},
+		{TypeName{}, 52, 88},
+		{Var{}, 60, 104},
+		{Func{}, 60, 104},
+		{Label{}, 56, 96},
+		{Builtin{}, 56, 96},
+		{Nil{}, 52, 88},

 		// Misc
 		{Scope{}, 60, 104},
--- a/src/cmd/compile/internal/types2/typexpr.go
+++ b/src/cmd/compile/internal/types2/typexpr.go
@ -16,7 +16,7 @@ import (

 // ident type-checks identifier e and initializes x with the value or type of e.
 // If an error occurred, x.mode is set to invalid.
-// For the meaning of def, see Checker.definedType, below.
+// For the meaning of def, see Checker.declaredType, below.
 // If wantType is set, the identifier e is expected to denote a type.
 func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType bool) {
 	x.mode = invalid
@ -149,14 +149,14 @@ func (check *Checker) ident(x *operand, e *syntax.Name, def *TypeName, wantType
 // typ type-checks the type expression e and returns its type, or Typ[Invalid].
 // The type must not be an (uninstantiated) generic type.
 func (check *Checker) typ(e syntax.Expr) Type {
-	return check.definedType(e, nil)
+	return check.declaredType(e, nil)
 }

 // varType type-checks the type expression e and returns its type, or Typ[Invalid].
 // The type must not be an (uninstantiated) generic type and it must not be a
 // constraint interface.
 func (check *Checker) varType(e syntax.Expr) Type {
-	typ := check.definedType(e, nil)
+	typ := check.declaredType(e, nil)
 	check.validVarType(e, typ)
 	return typ
 }
@ -187,11 +187,11 @@ func (check *Checker) validVarType(e syntax.Expr, typ Type) {
 	}).describef(e, "check var type %s", typ)
 }

-// definedType is like typ but also accepts a type name def.
-// If def != nil, e is the type specification for the type named def, declared
-// in a type declaration, and def.typ.underlying will be set to the type of e
-// before any components of e are type-checked.
-func (check *Checker) definedType(e syntax.Expr, def *TypeName) Type {
+// declaredType is like typ but also accepts a type name def.
+// If def != nil, e is the type specification for the [Alias] or [Named] type
+// named def, and def.typ.fromRHS will be set to the [Type] of e immediately
+// after its creation.
+func (check *Checker) declaredType(e syntax.Expr, def *TypeName) Type {
 	typ := check.typInternal(e, def)
 	assert(isTyped(typ))
 	if isGeneric(typ) {
@ -230,7 +230,7 @@ func goTypeName(typ Type) string {
 }

 // typInternal drives type checking of types.
-// Must only be called by definedType or genericType.
+// Must only be called by declaredType or genericType.
 func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) {
 	if check.conf.Trace {
 		check.trace(e0.Pos(), "-- type %s", e0)
@ -296,7 +296,7 @@ func (check *Checker) typInternal(e0 syntax.Expr, def *TypeName) (T Type) {
 	case *syntax.ParenExpr:
 		// Generic types must be instantiated before they can be used in any form.
 		// Consequently, generic types cannot be parenthesized.
-		return check.definedType(e.X, def)
+		return check.declaredType(e.X, def)

 	case *syntax.ArrayType:
 		typ := new(Array)
--- a/src/cmd/compile/internal/types2/universe.go
+++ b/src/cmd/compile/internal/types2/universe.go
@ -98,7 +98,6 @@ func defPredeclaredTypes() {
 	// interface.
 	{
 		universeAnyNoAlias = NewTypeName(nopos, nil, "any", &Interface{complete: true, tset: &topTypeSet})
-		universeAnyNoAlias.setColor(black)
 		// ensure that the any TypeName reports a consistent Parent, after
 		// hijacking Universe.Lookup with gotypesalias=0.
 		universeAnyNoAlias.setParent(Universe)
@ -107,7 +106,6 @@ func defPredeclaredTypes() {
 		// into the Universe, but we lean toward the future and insert the Alias
 		// representation.
 		universeAnyAlias = NewTypeName(nopos, nil, "any", nil)
-		universeAnyAlias.setColor(black)
 		_ = NewAlias(universeAnyAlias, universeAnyNoAlias.Type().Underlying()) // Link TypeName and Alias
 		def(universeAnyAlias)
 	}
@ -115,7 +113,6 @@ func defPredeclaredTypes() {
 	// type error interface{ Error() string }
 	{
 		obj := NewTypeName(nopos, nil, "error", nil)
-		obj.setColor(black)
 		typ := (*Checker)(nil).newNamed(obj, nil, nil)

 		// error.Error() string
@ -136,7 +133,6 @@ func defPredeclaredTypes() {
 	// type comparable interface{} // marked as comparable
 	{
 		obj := NewTypeName(nopos, nil, "comparable", nil)
-		obj.setColor(black)
 		typ := (*Checker)(nil).newNamed(obj, nil, nil)

 		// interface{} // marked as comparable
@ -165,7 +161,7 @@ func defPredeclaredConsts() {
 }

 func defPredeclaredNil() {
-	def(&Nil{object{name: "nil", typ: Typ[UntypedNil], color_: black}})
+	def(&Nil{object{name: "nil", typ: Typ[UntypedNil]}})
 }

 // A builtinId is the id of a builtin function.
@ -289,7 +285,7 @@ func init() {
 // a scope. Objects with exported names are inserted in the unsafe package
 // scope; other objects are inserted in the universe scope.
 func def(obj Object) {
-	assert(obj.color() == black)
+	assert(obj.Type() != nil)
 	name := obj.Name()
 	if strings.Contains(name, " ") {
 		return // nothing to do
--- a/src/cmd/compile/internal/walk/expr.go
+++ b/src/cmd/compile/internal/walk/expr.go
@ -351,6 +351,11 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node {

 	case ir.OMETHVALUE:
 		return walkMethodValue(n.(*ir.SelectorExpr), init)
+
+	case ir.OMOVE2HEAP:
+		n := n.(*ir.MoveToHeapExpr)
+		n.Slice = walkExpr(n.Slice, init)
+		return n
 	}

 	// No return! Each case must return (or panic),
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@ -6,12 +6,12 @@ require (
 	github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5
 	golang.org/x/arch v0.22.1-0.20251016010524-fea4a9ec4938
 	golang.org/x/build v0.0.0-20250806225920-b7c66c047964
-	golang.org/x/mod v0.29.0
-	golang.org/x/sync v0.17.0
-	golang.org/x/sys v0.37.0
-	golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8
+	golang.org/x/mod v0.30.1-0.20251114215501-3f03020ad526
+	golang.org/x/sync v0.18.0
+	golang.org/x/sys v0.38.0
+	golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54
 	golang.org/x/term v0.34.0
-	golang.org/x/tools v0.38.1-0.20251015192825-7d9453ccc0f5
+	golang.org/x/tools v0.39.1-0.20251114194111-59ff18ce4883
 )

 require (
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@ -10,19 +10,19 @@ golang.org/x/arch v0.22.1-0.20251016010524-fea4a9ec4938 h1:VJ182b/ajNehMFRltVfCh
 golang.org/x/arch v0.22.1-0.20251016010524-fea4a9ec4938/go.mod h1:dNHoOeKiyja7GTvF9NJS1l3Z2yntpQNzgrjh1cU103A=
 golang.org/x/build v0.0.0-20250806225920-b7c66c047964 h1:yRs1K51GKq7hsIO+YHJ8LsslrvwFceNPIv0tYjpcBd0=
 golang.org/x/build v0.0.0-20250806225920-b7c66c047964/go.mod h1:i9Vx7+aOQUpYJRxSO+OpRStVBCVL/9ccI51xblWm5WY=
-golang.org/x/mod v0.29.0 h1:HV8lRxZC4l2cr3Zq1LvtOsi/ThTgWnUk/y64QSs8GwA=
-golang.org/x/mod v0.29.0/go.mod h1:NyhrlYXJ2H4eJiRy/WDBO6HMqZQ6q9nk4JzS3NuCK+w=
-golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
-golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
-golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
-golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
-golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 h1:LvzTn0GQhWuvKH/kVRS3R3bVAsdQWI7hvfLHGgh9+lU=
-golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8/go.mod h1:Pi4ztBfryZoJEkyFTI5/Ocsu2jXyDr6iSdgJiYE/uwE=
+golang.org/x/mod v0.30.1-0.20251114215501-3f03020ad526 h1:LPpBM4CGUFMC47OqgAr2YIUxEUjH1Ur+D3KR/1LiuuQ=
+golang.org/x/mod v0.30.1-0.20251114215501-3f03020ad526/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc=
+golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I=
+golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
+golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc=
+golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
+golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54 h1:E2/AqCUMZGgd73TQkxUMcMla25GB9i/5HOdLr+uH7Vo=
+golang.org/x/telemetry v0.0.0-20251111182119-bc8e575c7b54/go.mod h1:hKdjCMrbv9skySur+Nek8Hd0uJ0GuxJIoIX2payrIdQ=
 golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
 golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
 golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
 golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
-golang.org/x/tools v0.38.1-0.20251015192825-7d9453ccc0f5 h1:cz7f45KGWAtyIrz6bm45Gc+lw8beIxBSW3EQh4Bwbg4=
-golang.org/x/tools v0.38.1-0.20251015192825-7d9453ccc0f5/go.mod h1:yEsQ/d/YK8cjh0L6rZlY8tgtlKiBNTL14pGDJPJpYQs=
+golang.org/x/tools v0.39.1-0.20251114194111-59ff18ce4883 h1:aeO0AW8d+a+5+hNQx9f4J5egD89zftrY2x42KGQjLzI=
+golang.org/x/tools v0.39.1-0.20251114194111-59ff18ce4883/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef h1:mqLYrXCXYEZOop9/Dbo6RPX11539nwiCNBb1icVPmw8=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef/go.mod h1:8xcPgWmwlZONN1D9bjxtHEjrUtSEa3fakVF8iaewYKQ=
--- a/src/cmd/go/internal/cache/hash.go
+++ b/src/cmd/go/internal/cache/hash.go
@ -51,6 +51,9 @@ func stripExperiment(version string) string {
 	if i := strings.Index(version, " X:"); i >= 0 {
 		return version[:i]
 	}
+	if i := strings.Index(version, "-X:"); i >= 0 {
+		return version[:i]
+	}
 	return version
 }

--- a/src/cmd/go/internal/vcweb/script.go
+++ b/src/cmd/go/internal/vcweb/script.go
@ -44,7 +44,7 @@ func newScriptEngine() *script.Engine {
 		return script.OnceCondition(summary, func() (bool, error) { return f(), nil })
 	}
 	add("bzr", lazyBool("the 'bzr' executable exists and provides the standard CLI", hasWorkingBzr))
-	add("git-min-vers", script.PrefixCondition("<suffix> indicates a minimum git version", hasAtLeastGitVersion))
+	add("git-sha256", script.OnceCondition("the local 'git' version is recent enough to support sha256 object/commit hashes", gitSupportsSHA256))

 	interrupt := func(cmd *exec.Cmd) error { return cmd.Process.Signal(os.Interrupt) }
 	gracePeriod := 30 * time.Second // arbitrary
@ -412,10 +412,14 @@ func gitVersion() (string, error) {
 	return "v" + string(matches[1]), nil
 }

-func hasAtLeastGitVersion(s *script.State, minVers string) (bool, error) {
+func hasAtLeastGitVersion(minVers string) (bool, error) {
 	gitVers, gitVersErr := gitVersion()
 	if gitVersErr != nil {
 		return false, gitVersErr
 	}
 	return semver.Compare(minVers, gitVers) <= 0, nil
 }
+
+func gitSupportsSHA256() (bool, error) {
+	return hasAtLeastGitVersion("v2.29")
+}
--- a/src/cmd/go/scriptconds_test.go
+++ b/src/cmd/go/scriptconds_test.go
@ -44,7 +44,7 @@ func scriptConditions(t *testing.T) map[string]script.Cond {
 	add("case-sensitive", script.OnceCondition("$WORK filesystem is case-sensitive", isCaseSensitive))
 	add("cc", script.PrefixCondition("go env CC = <suffix> (ignoring the go/env file)", ccIs))
 	add("git", lazyBool("the 'git' executable exists and provides the standard CLI", hasWorkingGit))
-	add("git-min-vers", script.PrefixCondition("<suffix> indicates a minimum git version", hasAtLeastGitVersion))
+	add("git-sha256", script.OnceCondition("the local 'git' version is recent enough to support sha256 object/commit hashes", gitSupportsSHA256))
 	add("net", script.PrefixCondition("can connect to external network host <suffix>", hasNet))
 	add("trimpath", script.OnceCondition("test binary was built with -trimpath", isTrimpath))

@ -171,7 +171,7 @@ func gitVersion() (string, error) {
 	return "v" + string(matches[1]), nil
 }

-func hasAtLeastGitVersion(s *script.State, minVers string) (bool, error) {
+func hasAtLeastGitVersion(minVers string) (bool, error) {
 	gitVers, gitVersErr := gitVersion()
 	if gitVersErr != nil {
 		return false, gitVersErr
@ -179,6 +179,10 @@ func hasAtLeastGitVersion(s *script.State, minVers string) (bool, error) {
 	return semver.Compare(minVers, gitVers) <= 0, nil
 }

+func gitSupportsSHA256() (bool, error) {
+	return hasAtLeastGitVersion("v2.29")
+}
+
 func hasWorkingBzr() bool {
 	bzr, err := exec.LookPath("bzr")
 	if err != nil {
--- a/src/cmd/go/testdata/script/README
+++ b/src/cmd/go/testdata/script/README
@ -399,8 +399,8 @@ The available conditions are:
 	GOOS/GOARCH supports -fuzz with instrumentation
 [git]
 	the 'git' executable exists and provides the standard CLI
-[git-min-vers:*]
-	<suffix> indicates a minimum git version
+[git-sha256]
+	the local 'git' version is recent enough to support sha256 object/commit hashes
 [go-builder]
 	GO_BUILDER_NAME is non-empty
 [link]
--- a/src/cmd/go/testdata/script/build_git_sha256_go_get_branch.txt
+++ b/src/cmd/go/testdata/script/build_git_sha256_go_get_branch.txt
@ -1,6 +1,6 @@
 [short] skip
 [!git] skip
-[!git-min-vers:v2.29] skip
+[!git-sha256] skip

 env GOPRIVATE=vcs-test.golang.org

--- a/src/cmd/go/testdata/script/build_git_sha256_moddep.txt
+++ b/src/cmd/go/testdata/script/build_git_sha256_moddep.txt
@ -1,6 +1,6 @@
 [short] skip
 [!git] skip
-[!git-min-vers:v2.29] skip
+[!git-sha256] skip

 env GOPRIVATE=vcs-test.golang.org

--- a/src/cmd/go/testdata/script/mod_download_git_bareRepository_sha256.txt
+++ b/src/cmd/go/testdata/script/mod_download_git_bareRepository_sha256.txt
@ -1,6 +1,6 @@
 [short] skip
 [!git] skip
-[!git-min-vers:v2.29] skip
+[!git-sha256] skip

 # This is a git sha256-mode copy of mod_download_git_bareRepository

--- a/src/cmd/go/testdata/script/mod_get_direct.txt
+++ b/src/cmd/go/testdata/script/mod_get_direct.txt
@ -2,14 +2,14 @@
 # 'GOPROXY=direct go get golang.org/x/tools/gopls@master' did not correctly
 # resolve the pseudo-version for its dependency on golang.org/x/tools.

-[!net:cloud.google.com] skip
+[short] skip
 [!git] skip

 env GO111MODULE=on
 env GOPROXY=direct
 env GOSUMDB=off

-go list -m cloud.google.com/go@main
+go list -m vcs-test.golang.org/git/tagtests.git@master
 ! stdout 'v0.0.0-'

 -- go.mod --
--- a/src/cmd/go/testdata/vcstest/git/gitrepo-sha256.txt
+++ b/src/cmd/go/testdata/vcstest/git/gitrepo-sha256.txt
@ -1,4 +1,4 @@
-[!git-min-vers:v2.29] skip
+[!git-sha256] skip

 handle git

--- a/src/cmd/internal/obj/link.go
+++ b/src/cmd/internal/obj/link.go
@ -1155,6 +1155,7 @@ type Func interface {
 type Link struct {
 	Headtype             objabi.HeadType
 	Arch                 *LinkArch
+	CompressInstructions bool // use compressed instructions where possible (if supported by architecture)
 	Debugasm             int
 	Debugvlog            bool
 	Debugpcln            string
--- a/src/cmd/internal/obj/loong64/a.out.go
+++ b/src/cmd/internal/obj/loong64/a.out.go
@ -589,6 +589,10 @@ const (
 	AORN
 	AANDN

+	// 2.2.1.12
+	AMULWVW
+	AMULWVWU
+
 	// 2.2.7. Atomic Memory Access Instructions
 	AAMSWAPB
 	AAMSWAPH
--- a/src/cmd/internal/obj/loong64/anames.go
+++ b/src/cmd/internal/obj/loong64/anames.go
@ -131,6 +131,8 @@ var Anames = []string{
 	"ALSLV",
 	"ORN",
 	"ANDN",
+	"MULWVW",
+	"MULWVWU",
 	"AMSWAPB",
 	"AMSWAPH",
 	"AMSWAPW",
--- a/src/cmd/internal/obj/loong64/asm.go
+++ b/src/cmd/internal/obj/loong64/asm.go
@ -1503,6 +1503,8 @@ func buildop(ctxt *obj.Link) {
 			opset(AREMU, r0)
 			opset(ADIV, r0)
 			opset(ADIVU, r0)
+			opset(AMULWVW, r0)
+			opset(AMULWVWU, r0)

 		case AMULV:
 			opset(AMULVU, r0)
@ -3230,6 +3232,10 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
 		return 0x3c << 15 // mulh.d
 	case AMULHVU:
 		return 0x3d << 15 // mulhu.d
+	case AMULWVW:
+		return 0x3e << 15 // mulw.d.w
+	case AMULWVWU:
+		return 0x3f << 15 // mulw.d.wu
 	case ADIV:
 		return 0x40 << 15 // div.w
 	case ADIVU:
--- a/src/cmd/internal/obj/riscv/asm_test.go
+++ b/src/cmd/internal/obj/riscv/asm_test.go
@ -11,8 +11,8 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"regexp"
 	"runtime"
-	"strings"
 	"testing"
 )

@ -48,10 +48,10 @@ func genLargeBranch(buf *bytes.Buffer) {
 	fmt.Fprintln(buf, "TEXT f(SB),0,$0-0")
 	fmt.Fprintln(buf, "BEQ X0, X0, label")
 	for i := 0; i < 1<<19; i++ {
-		fmt.Fprintln(buf, "ADD $0, X0, X0")
+		fmt.Fprintln(buf, "ADD $0, X5, X0")
 	}
 	fmt.Fprintln(buf, "label:")
-	fmt.Fprintln(buf, "ADD $0, X0, X0")
+	fmt.Fprintln(buf, "ADD $0, X5, X0")
 }

 // TestLargeCall generates a large function (>1MB of text) with a call to
@ -112,11 +112,11 @@ func genLargeCall(buf *bytes.Buffer) {
 	fmt.Fprintln(buf, "TEXT ·x(SB),0,$0-0")
 	fmt.Fprintln(buf, "CALL ·y(SB)")
 	for i := 0; i < 1<<19; i++ {
-		fmt.Fprintln(buf, "ADD $0, X0, X0")
+		fmt.Fprintln(buf, "ADD $0, X5, X0")
 	}
 	fmt.Fprintln(buf, "RET")
 	fmt.Fprintln(buf, "TEXT ·y(SB),0,$0-0")
-	fmt.Fprintln(buf, "ADD $0, X0, X0")
+	fmt.Fprintln(buf, "ADD $0, X5, X0")
 	fmt.Fprintln(buf, "RET")
 }

@ -301,9 +301,9 @@ TEXT _stub(SB),$0-0
 	//	FENCE
 	//	NOP
 	//	FENCE
-	//	RET
-	want := "0f 00 f0 0f 13 00 00 00 0f 00 f0 0f 67 80 00 00"
-	if !strings.Contains(string(out), want) {
+	//	RET	(CJALR or JALR)
+	want := regexp.MustCompile("0x0000 0f 00 f0 0f 13 00 00 00 0f 00 f0 0f (82 80|67 80 00 00) ")
+	if !want.Match(out) {
 		t.Errorf("PCALIGN test failed - got %s\nwant %s", out, want)
 	}
 }
--- a/src/cmd/internal/obj/riscv/cpu.go
+++ b/src/cmd/internal/obj/riscv/cpu.go
@ -326,6 +326,9 @@ const (
 	NEED_GOT_PCREL_ITYPE_RELOC
 )

+const NEED_RELOC = NEED_JAL_RELOC | NEED_CALL_RELOC | NEED_PCREL_ITYPE_RELOC |
+	NEED_PCREL_STYPE_RELOC | NEED_GOT_PCREL_ITYPE_RELOC
+
 // RISC-V mnemonics, as defined in the "opcodes" and "opcodes-pseudo" files
 // at https://github.com/riscv/riscv-opcodes.
 //
--- a/src/cmd/internal/obj/riscv/doc.go
+++ b/src/cmd/internal/obj/riscv/doc.go
@ -0,0 +1,297 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+/*
+Package riscv implements the riscv64 assembler.
+
+# Register naming
+
+The integer registers are named X0 through to X31, however X4 must be accessed
+through its RISC-V ABI name, TP, and X27, which holds a pointer to the Go
+routine structure, must be referred to as g. Additionally, when building in
+shared mode, X3 is unavailable and must be accessed via its RISC-V ABI name,
+GP.
+
+The floating-point registers are named F0 through to F31.
+
+The vector registers are named V0 through to V31.
+
+Both integer and floating-point registers can be referred to by their RISC-V
+ABI names, e.g., A0 or FT0, with the exception that X27 cannot be referred to
+by its RISC-V ABI name, S11.  It must be referred to as g.
+
+Some of the integer registers are used by the Go runtime and assembler - X26 is
+the closure pointer, X27 points to the Go routine structure and X31 is a
+temporary register used by the Go assembler. Use of X31 should be avoided in
+hand written assembly code as its value could be altered by the instruction
+sequences emitted by the assembler.
+
+# Instruction naming
+
+Many RISC-V instructions contain one or more suffixes in their names. In the
+[RISC-V ISA Manual] these suffixes are separated from themselves and the
+name of the instruction mnemonic with a dot ('.'). In the Go assembler, the
+separators are omitted and the suffixes are written in upper case.
+
+Example:
+
+	FMVWX           <=>     fmv.w.x
+
+# Rounding modes
+
+The Go toolchain does not set the FCSR register and requires the desired
+rounding mode to be explicitly encoded within floating-point instructions.
+The syntax the Go assembler uses to specify the rounding modes differs
+from the syntax in the RISC-V specifications. In the [RISC-V ISA Manual]
+the rounding mode is given as an extra operand at the end of an
+assembly language instruction. In the Go assembler, the rounding modes are
+converted to uppercase and follow the instruction mnemonic from which they
+are separated with a dot ('.').
+
+Example:
+
+	FCVTLUS.RNE F0, X5      <=>     fcvt.lu.s x5, f0, rne
+
+RTZ is assumed if the rounding mode is omitted.
+
+# RISC-V extensions
+
+By default the Go compiler targets the [rva20u64] profile. This profile mandates
+all the general RISC-V instructions, allowing Go to use integer, multiplication,
+division, floating-point and atomic instructions without having to
+perform compile time or runtime checks to verify that their use is appropriate
+for the target hardware. All widely available riscv64 devices support at least
+[rva20u64]. The Go toolchain can be instructed to target later RISC-V profiles,
+including, [rva22u64] and [rva23u64], via the GORISCV64 environment variable.
+Instructions that are provided by newer profiles cannot typically be used in
+handwritten assembly code without compile time guards (or runtime checks)
+that ensure they are hardware supported.
+
+The file asm_riscv64.h defines macros for each RISC-V extension that is enabled
+by setting the GORISCV64 environment variable to a value other than [rva20u64].
+For example, if GORISCV64=rva22u64 the macros hasZba, hasZbb and hasZbs will be
+defined. If GORISCV64=rva23u64 hasV will be defined in addition to hasZba,
+hasZbb and hasZbs. These macros can be used to determine whether it's safe
+to use an instruction in hand-written assembly.
+
+It is not always necessary to include asm_riscv64.h and use #ifdefs in your
+code to safely take advantage of instructions present in the [rva22u64]
+profile. In some cases the assembler can generate [rva20u64] compatible code
+even when an [rva22u64] instruction is used in an assembly source file. When
+GORISCV64=rva20u64 the assembler will synthesize certain [rva22u64]
+instructions, e.g., ANDN, using multiple [rva20u64] instructions. Instructions
+such as ANDN can then be freely used in assembly code without checking to see
+whether the instruction is supported by the target profile. When building a
+source file containing the ANDN instruction with GORISCV64=rva22u64 the
+assembler will emit the Zbb ANDN instruction directly. When building the same
+source file with GORISCV64=rva20u64 the assembler will emit multiple [rva20u64]
+instructions to synthesize ANDN.
+
+The assembler will also use [rva22u64] instructions to implement the zero and
+sign extension instructions, e.g., MOVB and MOVHU, when GORISCV64=rva22u64 or
+greater.
+
+The instructions not implemented in the default profile ([rva20u64]) that can
+be safely used in assembly code without compile time checks are:
+
+  - ANDN
+  - MAX
+  - MAXU
+  - MIN
+  - MINU
+  - MOVB
+  - MOVH
+  - MOVHU
+  - MOVWU
+  - ORN
+  - ROL
+  - ROLW
+  - ROR
+  - RORI
+  - RORIW
+  - RORW
+  - XNOR
+
+# Operand ordering
+
+The ordering used for instruction operands in the Go assembler differs from the
+ordering defined in the [RISC-V ISA Manual].
+
+1. R-Type instructions
+
+R-Type instructions are written in the reverse order to that given in the
+[RISC-V ISA Manual], with the register order being rs2, rs1, rd.
+
+Examples:
+
+	ADD X10, X11, X12       <=>     add x12, x11, x10
+	FADDD F10, F11, F12     <=>     fadd.d f12, f11, f10
+
+2. I-Type arithmetic instructions
+
+I-Type arithmetic instructions (not loads, fences, ebreak, ecall) use the same
+ordering as the R-Type instructions, typically, imm12, rs1, rd.
+
+Examples:
+
+	ADDI $1, X11, X12       <=>     add x12, x11, 1
+	SLTI $1, X11, X12       <=>     slti x12, x11, 1
+
+3. Loads and Stores
+
+Load instructions are written with the source operand (whether it be a register
+or a memory address), first followed by the destination operand.
+
+Examples:
+
+	MOV 16(X2), X10         <=>     ld x10, 16(x2)
+	MOV X10, (X2)           <=>     sd x10, 0(x2)
+
+4. Branch instructions
+
+The branch instructions use the same operand ordering as is given in the
+[RISC-V ISA Manual], e.g., rs1, rs2, label.
+
+Example:
+
+	BLT X12, X23, loop1     <=>     blt x12, x23, loop1
+
+BLT X12, X23, label will jump to label if X12 < X23. Note this is not the
+same ordering as is used for the SLT instructions.
+
+5. FMA instructions
+
+The Go assembler uses a different ordering for the RISC-V FMA operands to
+the ordering given in the [RISC-V ISA Manual]. The operands are rotated one
+place to the left, so that the destination operand comes last.
+
+Example:
+
+	FMADDS  F1, F2, F3, F4  <=>     fmadd.s f4, f1, f2, f3
+
+6. AMO instructions
+
+The ordering used for the AMO operations is rs2, rs1, rd, i.e., the operands
+as specified in the [RISC-V ISA Manual] are rotated one place to the left.
+
+Example:
+
+	AMOSWAPW X5, (X6), X7   <=>     amoswap.w x7, x5, (x6)
+
+7. Vector instructions
+
+The VSETVLI instruction uses the same symbolic names as the [RISC-V ISA Manual]
+to represent the components of vtype, with the exception
+that they are written in upper case. The ordering of the operands in the Go
+assembler differs from the [RISC-V ISA Manual] in that the operands are
+rotated one place to the left so that the destination register, the register
+that holds the new vl, is the last operand.
+
+Example:
+
+	VSETVLI X10, E8, M1, TU, MU, X12        <=>     vsetvli x12, x10, e8, m1, tu, mu
+
+Vector load and store instructions follow the pattern set by scalar loads and
+stores, i.e., the source is always the first operand and the destination the
+last. However, the ordering of the operands of these instructions is
+complicated by the optional mask register and, in some cases, the use of an
+additional stride or index register. In the Go assembler the index and stride
+registers appear as the second operand in indexed or strided loads and stores,
+while the mask register, if present, is always the penultimate operand.
+
+Examples:
+
+	VLE8V (X10), V3                 <=>     vle8.v  v3, (x10)
+	VSE8V V3, (X10)                 <=>     vse8.v  v3, (x10)
+	VLE8V (X10), V0, V3             <=>     vle8.v  v3, (x10), v0.t
+	VSE8V V3, V0, (X10)             <=>     vse8.v  v3, (x10), v0.t
+	VLSE8V (X10), X11, V3           <=>     vlse8.v v3, (x10), x11
+	VSSE8V V3, X11, (X10)           <=>     vsse8.v v3, (x10), x11
+	VLSE8V (X10), X11, V0, V3       <=>     vlse8.v v3, (x10), x11, v0.t
+	VSSE8V V3, X11, V0, (X10)       <=>     vsse8.v v3, (x10), x11, v0.t
+	VLUXEI8V (X10), V2, V3          <=>     vluxei8.v v3, (x10), v2
+	VSUXEI8V V3, V2, (X10)          <=>     vsuxei8.v v3, (x10), v2
+	VLUXEI8V (X10), V2, V0, V3      <=>     vluxei8.v v3, (x10), v2, v0.t
+	VSUXEI8V V3, V2, V0, (X10)      <=>     vsuxei8.v v3, (x10), v2, v0.t
+	VL1RE8V (X10), V3               <=>     vl1re8.v v3, (x10)
+	VS1RV V3, (X11)                 <=>     vs1r.v  v3, (x11)
+
+The ordering of operands for two and three argument vector arithmetic instructions is
+reversed in the Go assembler.
+
+Examples:
+
+	VMVVV V2, V3                    <=> vmv.v.v v3, v2
+	VADDVV V1, V2, V3               <=> vadd.vv v3, v2, v1
+	VADDVX X10, V2, V3              <=> vadd.vx v3, v2, x10
+	VMADCVI $15, V2, V3             <=> vmadc.vi v3, v2, 15
+
+The mask register, when specified, is always the penultimate operand in a vector
+arithmetic instruction, appearing before the destination register.
+
+Examples:
+
+	VANDVV V1, V2, V0, V3           <=> vand.vv v3, v2, v1, v0.t
+
+# Ternary instructions
+
+The Go assembler allows the second operand to be omitted from most ternary
+instructions if it matches the third (destination) operand.
+
+Examples:
+
+	ADD X10, X12, X12       <=>     ADD X10, X12
+	ANDI $3, X12, X12       <=>     ANDI $3, X12
+
+The use of this abbreviated syntax is encouraged.
+
+# Ordering of atomic instructions
+
+It is not possible to specify the ordering bits in the FENCE, LR, SC or AMO
+instructions.  The FENCE instruction is always emitted as a full fence, the
+acquire and release bits are always set for the AMO instructions, the acquire
+bit is always set for the LR instructions while the release bit is set for
+the SC instructions.
+
+# Immediate operands
+
+In many cases, where an R-Type instruction has a corresponding I-Type
+instruction, the R-Type mnemonic can be used in place of the I-Type mnemonic.
+The assembler assumes that the immediate form of the instruction was intended
+when the first operand is given as an immediate value rather than a register.
+
+Example:
+
+	AND $3, X12, X13        <=>     ANDI $3, X12, X13
+
+# Integer constant materialization
+
+The MOV instruction can be used to set a register to the value of any 64 bit
+constant literal. The way this is achieved by the assembler varies depending
+on the value of the constant. Where possible the assembler will synthesize the
+constant using one or more RISC-V arithmetic instructions. If it is unable
+to easily materialize the constant it will load the 64 bit literal from memory.
+
+A 32 bit constant literal can be specified as an argument to ADDI, ANDI, ORI and
+XORI. If the specified literal does not fit into 12 bits the assembler will
+generate extra instructions to synthesize it.
+
+Integer constants provided as operands to all other instructions must fit into
+the number of bits allowed by the instructions' encodings for immediate values.
+Otherwise, an error will be generated.
+
+# Floating point constant materialization
+
+The MOVF and MOVD instructions can be used to set a register to the value
+of any 32 bit or 64 bit floating point constant literal, respectively.  Unless
+the constant literal is 0.0, MOVF and MOVD will be encoded as FLW and FLD
+instructions that load the constant from a location within the program's
+binary.
+
+[RISC-V ISA Manual]: https://github.com/riscv/riscv-isa-manual
+[rva20u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#51-rva20u64-profile
+[rva22u64]: https://github.com/riscv/riscv-profiles/blob/main/src/profiles.adoc#rva22u64-profile
+[rva23u64]: https://github.com/riscv/riscv-profiles/blob/main/src/rva23-profile.adoc#rva23u64-profile
+*/
+package riscv
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@ -414,10 +414,10 @@ func containsCall(sym *obj.LSym) bool {

 // setPCs sets the Pc field in all instructions reachable from p.
 // It uses pc as the initial value and returns the next available pc.
-func setPCs(p *obj.Prog, pc int64) int64 {
+func setPCs(p *obj.Prog, pc int64, compress bool) int64 {
 	for ; p != nil; p = p.Link {
 		p.Pc = pc
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, compress) {
 			pc += int64(ins.length())
 		}

@ -671,7 +671,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 	// a fixed point will be reached).  No attempt to handle functions > 2GiB.
 	for {
 		big, rescan := false, false
-		maxPC := setPCs(cursym.Func().Text, 0)
+		maxPC := setPCs(cursym.Func().Text, 0, ctxt.CompressInstructions)
 		if maxPC+maxTrampSize > (1 << 20) {
 			big = true
 		}
@ -801,7 +801,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {

 	// Validate all instructions - this provides nice error messages.
 	for p := cursym.Func().Text; p != nil; p = p.Link {
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
 			ins.validate(ctxt)
 		}
 	}
@ -1141,6 +1141,14 @@ func wantImmU(ctxt *obj.Link, ins *instruction, imm int64, nbits uint) {
 	}
 }

+func isScaledImmI(imm int64, nbits uint, scale int64) bool {
+	return immFits(imm, nbits, true) == nil && imm%scale == 0
+}
+
+func isScaledImmU(imm int64, nbits uint, scale int64) bool {
+	return immFits(imm, nbits, false) == nil && imm%scale == 0
+}
+
 func wantScaledImm(ctxt *obj.Link, ins *instruction, imm int64, nbits uint, scale int64, signed bool) {
 	if err := immFits(imm, nbits, signed); err != nil {
 		ctxt.Diag("%v: %v", ins, err)
@ -1180,6 +1188,10 @@ func wantIntReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
 	wantReg(ctxt, ins, pos, "integer", r, REG_X0, REG_X31)
 }

+func isIntPrimeReg(r uint32) bool {
+	return r >= REG_X8 && r <= REG_X15
+}
+
 // wantIntPrimeReg checks that r is an integer register that can be used
 // in a prime register field of a compressed instruction.
 func wantIntPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -1191,6 +1203,10 @@ func wantFloatReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
 	wantReg(ctxt, ins, pos, "float", r, REG_F0, REG_F31)
 }

+func isFloatPrimeReg(r uint32) bool {
+	return r >= REG_F8 && r <= REG_F15
+}
+
 // wantFloatPrimeReg checks that r is an floating-point register that can
 // be used in a prime register field of a compressed instruction.
 func wantFloatPrimeReg(ctxt *obj.Link, ins *instruction, pos string, r uint32) {
@ -3515,6 +3531,147 @@ func (ins *instruction) usesRegTmp() bool {
 	return ins.rd == REG_TMP || ins.rs1 == REG_TMP || ins.rs2 == REG_TMP
 }

+func (ins *instruction) compress() {
+	switch ins.as {
+	case ALW:
+		if ins.rd != REG_X0 && ins.rs1 == REG_SP && isScaledImmU(ins.imm, 8, 4) {
+			ins.as, ins.rs1, ins.rs2 = ACLWSP, obj.REG_NONE, ins.rs1
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
+			ins.as = ACLW
+		}
+
+	case ALD:
+		if ins.rs1 == REG_SP && ins.rd != REG_X0 && isScaledImmU(ins.imm, 9, 8) {
+			ins.as, ins.rs1, ins.rs2 = ACLDSP, obj.REG_NONE, ins.rs1
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+			ins.as = ACLD
+		}
+
+	case AFLD:
+		if ins.rs1 == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+			ins.as, ins.rs1, ins.rs2 = ACFLDSP, obj.REG_NONE, ins.rs1
+		} else if isFloatPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+			ins.as = ACFLD
+		}
+
+	case ASW:
+		if ins.rd == REG_SP && isScaledImmU(ins.imm, 8, 4) {
+			ins.as, ins.rs1, ins.rs2 = ACSWSP, obj.REG_NONE, ins.rs1
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 7, 4) {
+			ins.as, ins.rd, ins.rs1, ins.rs2 = ACSW, obj.REG_NONE, ins.rd, ins.rs1
+		}
+
+	case ASD:
+		if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+			ins.as, ins.rs1, ins.rs2 = ACSDSP, obj.REG_NONE, ins.rs1
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+			ins.as, ins.rd, ins.rs1, ins.rs2 = ACSD, obj.REG_NONE, ins.rd, ins.rs1
+		}
+
+	case AFSD:
+		if ins.rd == REG_SP && isScaledImmU(ins.imm, 9, 8) {
+			ins.as, ins.rs1, ins.rs2 = ACFSDSP, obj.REG_NONE, ins.rs1
+		} else if isIntPrimeReg(ins.rd) && isFloatPrimeReg(ins.rs1) && isScaledImmU(ins.imm, 8, 8) {
+			ins.as, ins.rd, ins.rs1, ins.rs2 = ACFSD, obj.REG_NONE, ins.rd, ins.rs1
+		}
+
+	case AADDI:
+		if ins.rd == REG_SP && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmI(ins.imm, 10, 16) {
+			ins.as = ACADDI16SP
+		} else if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
+			ins.as = ACADDI
+		} else if isIntPrimeReg(ins.rd) && ins.rs1 == REG_SP && ins.imm != 0 && isScaledImmU(ins.imm, 10, 4) {
+			ins.as = ACADDI4SPN
+		} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && immIFits(ins.imm, 6) == nil {
+			ins.as, ins.rs1 = ACLI, obj.REG_NONE
+		} else if ins.rd != REG_X0 && ins.rs1 != REG_X0 && ins.imm == 0 {
+			ins.as, ins.rs1, ins.rs2 = ACMV, obj.REG_NONE, ins.rs1
+		} else if ins.rd == REG_X0 && ins.rs1 == REG_X0 && ins.imm == 0 {
+			ins.as, ins.rs1 = ACNOP, ins.rd
+		}
+
+	case AADDIW:
+		if ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
+			ins.as = ACADDIW
+		}
+
+	case ALUI:
+		if ins.rd != REG_X0 && ins.rd != REG_SP && ins.imm != 0 && immIFits(ins.imm, 6) == nil {
+			ins.as = ACLUI
+		}
+
+	case ASLLI:
+		if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.imm != 0 {
+			ins.as = ACSLLI
+		}
+
+	case ASRLI:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
+			ins.as = ACSRLI
+		}
+
+	case ASRAI:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && ins.imm != 0 {
+			ins.as = ACSRAI
+		}
+
+	case AANDI:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && immIFits(ins.imm, 6) == nil {
+			ins.as = ACANDI
+		}
+
+	case AADD:
+		if ins.rd != REG_X0 && ins.rd == ins.rs1 && ins.rs2 != REG_X0 {
+			ins.as = ACADD
+		} else if ins.rd != REG_X0 && ins.rd == ins.rs2 && ins.rs1 != REG_X0 {
+			ins.as, ins.rs1, ins.rs2 = ACADD, ins.rs2, ins.rs1
+		} else if ins.rd != REG_X0 && ins.rs1 == REG_X0 && ins.rs2 != REG_X0 {
+			ins.as = ACMV
+		}
+
+	case AADDW:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACADDW
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+			ins.as, ins.rs1, ins.rs2 = ACADDW, ins.rs2, ins.rs1
+		}
+
+	case ASUB:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACSUB
+		}
+
+	case ASUBW:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACSUBW
+		}
+
+	case AAND:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACAND
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+			ins.as, ins.rs1, ins.rs2 = ACAND, ins.rs2, ins.rs1
+		}
+
+	case AOR:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACOR
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+			ins.as, ins.rs1, ins.rs2 = ACOR, ins.rs2, ins.rs1
+		}
+
+	case AXOR:
+		if isIntPrimeReg(ins.rd) && ins.rd == ins.rs1 && isIntPrimeReg(ins.rs2) {
+			ins.as = ACXOR
+		} else if isIntPrimeReg(ins.rd) && isIntPrimeReg(ins.rs1) && ins.rd == ins.rs2 {
+			ins.as, ins.rs1, ins.rs2 = ACXOR, ins.rs2, ins.rs1
+		}
+
+	case AEBREAK:
+		ins.as, ins.rd, ins.rs1 = ACEBREAK, obj.REG_NONE, obj.REG_NONE
+	}
+}
+
 // instructionForProg returns the default *obj.Prog to instruction mapping.
 func instructionForProg(p *obj.Prog) *instruction {
 	ins := &instruction{
@ -4057,7 +4214,7 @@ func instructionsForMinMax(p *obj.Prog, ins *instruction) []*instruction {
 }

 // instructionsForProg returns the machine instructions for an *obj.Prog.
-func instructionsForProg(p *obj.Prog) []*instruction {
+func instructionsForProg(p *obj.Prog, compress bool) []*instruction {
 	ins := instructionForProg(p)
 	inss := []*instruction{ins}

@ -4710,6 +4867,15 @@ func instructionsForProg(p *obj.Prog) []*instruction {
 		ins.rs1, ins.rs2 = obj.REG_NONE, REG_V0
 	}

+	// Only compress instructions when there is no relocation, since
+	// relocation relies on knowledge about the exact instructions that
+	// are in use.
+	if compress && p.Mark&NEED_RELOC == 0 {
+		for _, ins := range inss {
+			ins.compress()
+		}
+	}
+
 	for _, ins := range inss {
 		ins.p = p
 	}
@ -4799,15 +4965,22 @@ func assemble(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
 			v := pcAlignPadLength(p.Pc, alignedValue)
 			offset := p.Pc
 			for ; v >= 4; v -= 4 {
-				// NOP
-				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0, 0, 0})
+				// NOP (ADDI $0, X0, X0)
+				cursym.WriteBytes(ctxt, offset, []byte{0x13, 0x00, 0x00, 0x00})
 				offset += 4
 			}
+			if v == 2 {
+				// CNOP
+				cursym.WriteBytes(ctxt, offset, []byte{0x01, 0x00})
+				offset += 2
+			} else if v != 0 {
+				ctxt.Diag("bad PCALIGN pad length")
+			}
 			continue
 		}

 		offset := p.Pc
-		for _, ins := range instructionsForProg(p) {
+		for _, ins := range instructionsForProg(p, ctxt.CompressInstructions) {
 			if ic, err := ins.encode(); err == nil {
 				cursym.WriteInt(ctxt, offset, ins.length(), int64(ic))
 				offset += int64(ins.length())
--- a/src/cmd/internal/obj/x86/obj6.go
+++ b/src/cmd/internal/obj/x86/obj6.go
@ -423,9 +423,13 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
 			q.From.Reg = reg
 		}
 	}
-	if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
+	from3 := p.GetFrom3()
+	for i := range p.RestArgs {
+		a := &p.RestArgs[i].Addr
+		if a != from3 && a.Name == obj.NAME_EXTERN && !a.Sym.Local() {
 			ctxt.Diag("don't know how to handle %v with -dynlink", p)
 		}
+	}
 	var source *obj.Addr
 	// MOVx sym, Ry becomes $MOV sym@GOT, R15; MOVx (R15), Ry
 	// MOVx Ry, sym becomes $MOV sym@GOT, R15; MOVx Ry, (R15)
@ -434,9 +438,17 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
 		if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
 			ctxt.Diag("cannot handle NAME_EXTERN on both sides in %v with -dynlink", p)
 		}
+		if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+			ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
+		}
 		source = &p.From
 	} else if p.To.Name == obj.NAME_EXTERN && !p.To.Sym.Local() {
+		if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+			ctxt.Diag("cannot handle NAME_EXTERN on multiple operands in %v with -dynlink", p)
+		}
 		source = &p.To
+	} else if from3 != nil && from3.Name == obj.NAME_EXTERN && !from3.Sym.Local() {
+		source = from3
 	} else {
 		return
 	}
@ -501,9 +513,7 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
 	p2.As = p.As
 	p2.From = p.From
 	p2.To = p.To
-	if from3 := p.GetFrom3(); from3 != nil {
-		p2.AddRestSource(*from3)
-	}
+	p2.RestArgs = p.RestArgs
 	if p.From.Name == obj.NAME_EXTERN {
 		p2.From.Reg = reg
 		p2.From.Name = obj.NAME_NONE
@ -512,6 +522,11 @@ func rewriteToUseGot(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc) {
 		p2.To.Reg = reg
 		p2.To.Name = obj.NAME_NONE
 		p2.To.Sym = nil
+	} else if p.GetFrom3() != nil && p.GetFrom3().Name == obj.NAME_EXTERN {
+		from3 = p2.GetFrom3()
+		from3.Reg = reg
+		from3.Name = obj.NAME_NONE
+		from3.Sym = nil
 	} else {
 		return
 	}
--- a/src/cmd/internal/sys/arch.go
+++ b/src/cmd/internal/sys/arch.go
@ -236,7 +236,7 @@ var ArchRISCV64 = &Arch{
 	ByteOrder:      binary.LittleEndian,
 	PtrSize:        8,
 	RegSize:        8,
-	MinLC:          4,
+	MinLC:          2,
 	Alignment:      8, // riscv unaligned loads work, but are really slow (trap + simulated by OS)
 	CanMergeLoads:  false,
 	HasLR:          true,
--- a/src/cmd/link/internal/ld/dwarf.go
+++ b/src/cmd/link/internal/ld/dwarf.go
@ -2507,19 +2507,19 @@ func dwarfcompress(ctxt *Link) {
 	var prevSect *sym.Section
 	for _, si := range dwarfp {
 		for _, s := range si.syms {
-			ldr.SetSymValue(s, int64(pos))
 			sect := ldr.SymSect(s)
 			if sect != prevSect {
+				if ctxt.IsWindows() {
+					pos = uint64(Rnd(int64(pos), PEFILEALIGN))
+				}
 				sect.Vaddr = pos
 				prevSect = sect
 			}
+			ldr.SetSymValue(s, int64(pos))
 			if ldr.SubSym(s) != 0 {
 				log.Fatalf("%s: unexpected sub-symbols", ldr.SymName(s))
 			}
 			pos += uint64(ldr.SymSize(s))
-			if ctxt.IsWindows() {
-				pos = uint64(Rnd(int64(pos), PEFILEALIGN))
-			}
 		}
 	}
 	Segdwarf.Length = pos - Segdwarf.Vaddr
--- a/src/cmd/link/internal/ld/ld_test.go
+++ b/src/cmd/link/internal/ld/ld_test.go
@ -387,7 +387,7 @@ func TestRISCVTrampolines(t *testing.T) {
 	buf := new(bytes.Buffer)
 	fmt.Fprintf(buf, "TEXT a(SB),$0-0\n")
 	for i := 0; i < 1<<17; i++ {
-		fmt.Fprintf(buf, "\tADD $0, X0, X0\n")
+		fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
 	}
 	fmt.Fprintf(buf, "\tCALL b(SB)\n")
 	fmt.Fprintf(buf, "\tRET\n")
@ -398,7 +398,7 @@ func TestRISCVTrampolines(t *testing.T) {
 	fmt.Fprintf(buf, "\tRET\n")
 	fmt.Fprintf(buf, "TEXT ·d(SB),0,$0-0\n")
 	for i := 0; i < 1<<17; i++ {
-		fmt.Fprintf(buf, "\tADD $0, X0, X0\n")
+		fmt.Fprintf(buf, "\tADD $0, X5, X0\n")
 	}
 	fmt.Fprintf(buf, "\tCALL a(SB)\n")
 	fmt.Fprintf(buf, "\tCALL c(SB)\n")
--- a/src/cmd/link/internal/ld/main.go
+++ b/src/cmd/link/internal/ld/main.go
@ -188,7 +188,11 @@ func Main(arch *sys.Arch, theArch Arch) {

 	buildVersion := buildcfg.Version
 	if goexperiment := buildcfg.Experiment.String(); goexperiment != "" {
-		buildVersion += " X:" + goexperiment
+		sep := " "
+		if !strings.Contains(buildVersion, "-") { // See go.dev/issue/75953.
+			sep = "-"
+		}
+		buildVersion += sep + "X:" + goexperiment
 	}
 	addstrdata1(ctxt, "runtime.buildVersion="+buildVersion)

--- a/src/cmd/link/internal/loader/loader.go
+++ b/src/cmd/link/internal/loader/loader.go
@ -2464,10 +2464,11 @@ var blockedLinknames = map[string][]string{
 	// Experimental features
 	"runtime.goroutineLeakGC":    {"runtime/pprof"},
 	"runtime.goroutineleakcount": {"runtime/pprof"},
+	"runtime.freegc":             {}, // disallow all packages
 	// Others
 	"net.newWindowsFile":                   {"net"},              // pushed from os
 	"testing/synctest.testingSynctestTest": {"testing/synctest"}, // pushed from testing
-	"runtime.addmoduledata":                {},                   // disallow all package
+	"runtime.addmoduledata":                {},                   // disallow all packages
 }

 // check if a linkname reference to symbol s from pkg is allowed
--- a/src/cmd/link/link_test.go
+++ b/src/cmd/link/link_test.go
@ -1616,6 +1616,7 @@ func TestCheckLinkname(t *testing.T) {
 		// pull linkname of a builtin symbol is not ok
 		{"builtin.go", false},
 		{"addmoduledata.go", false},
+		{"freegc.go", false},
 		// legacy bad linkname is ok, for now
 		{"fastrand.go", true},
 		{"badlinkname.go", true},
--- a/src/cmd/link/testdata/linkname/freegc.go
+++ b/src/cmd/link/testdata/linkname/freegc.go
@ -0,0 +1,18 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Linkname runtime.freegc is not allowed.
+
+package main
+
+import (
+	_ "unsafe"
+)
+
+//go:linkname freegc runtime.freegc
+func freegc()
+
+func main() {
+	freegc()
+}
--- a/src/cmd/trace/procgen.go
+++ b/src/cmd/trace/procgen.go
@ -143,6 +143,13 @@ func (g *procGenerator) ProcTransition(ctx *traceContext, ev *trace.Event) {
 	viewerEv := traceviewer.InstantEvent{
 		Resource: uint64(proc),
 		Stack:    ctx.Stack(viewerFrames(ev.Stack())),
+
+		// Annotate with the thread and proc. The proc is redundant, but this is to
+		// stay consistent with the thread view, where it's useful information.
+		Arg: format.SchedCtxArg{
+			ProcID:   uint64(st.Resource.Proc()),
+			ThreadID: uint64(ev.Thread()),
+		},
 	}

 	from, to := st.Proc()
@ -156,7 +163,6 @@ func (g *procGenerator) ProcTransition(ctx *traceContext, ev *trace.Event) {
 			start = ctx.startTime
 		}
 		viewerEv.Name = "proc start"
-		viewerEv.Arg = format.ThreadIDArg{ThreadID: uint64(ev.Thread())}
 		viewerEv.Ts = ctx.elapsed(start)
 		ctx.IncThreadStateCount(ctx.elapsed(start), traceviewer.ThreadStateRunning, 1)
 	}
--- a/src/cmd/trace/threadgen.go
+++ b/src/cmd/trace/threadgen.go
@ -138,14 +138,17 @@ func (g *threadGenerator) ProcTransition(ctx *traceContext, ev *trace.Event) {
 		}
 	}

-	type procArg struct {
-		Proc uint64 `json:"proc,omitempty"`
-	}
 	st := ev.StateTransition()
 	viewerEv := traceviewer.InstantEvent{
 		Resource: uint64(ev.Thread()),
 		Stack:    ctx.Stack(viewerFrames(ev.Stack())),
-		Arg:      procArg{Proc: uint64(st.Resource.Proc())},
+
+		// Annotate with the thread and proc. The thread is redundant, but this is to
+		// stay consistent with the proc view.
+		Arg: format.SchedCtxArg{
+			ProcID:   uint64(st.Resource.Proc()),
+			ThreadID: uint64(ev.Thread()),
+		},
 	}

 	from, to := st.Proc()
@ -159,7 +162,6 @@ func (g *threadGenerator) ProcTransition(ctx *traceContext, ev *trace.Event) {
 			start = ctx.startTime
 		}
 		viewerEv.Name = "proc start"
-		viewerEv.Arg = format.ThreadIDArg{ThreadID: uint64(ev.Thread())}
 		viewerEv.Ts = ctx.elapsed(start)
 		// TODO(mknyszek): We don't have a state machine for threads, so approximate
 		// running threads with running Ps.
--- a/src/cmd/vendor/golang.org/x/mod/modfile/print.go
+++ b/src/cmd/vendor/golang.org/x/mod/modfile/print.go
@ -33,7 +33,7 @@ type printer struct {
 }

 // printf prints to the buffer.
-func (p *printer) printf(format string, args ...interface{}) {
+func (p *printer) printf(format string, args ...any) {
 	fmt.Fprintf(p, format, args...)
 }

--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`pkg go/token, method (*File) End() Pos #75849`
				`@ -0,0 +1 @@`
				`pkg go/ast, type BasicLit struct, ValueEnd token.Pos #76031`
				`@ -0,0 +1 @@`
				`The new [File.End] convenience method returns the file's end position.`