[dev.simd] all: merge master (d70ad4e) into dev.simd

Conflicts: - src/cmd/compile/internal/types2/stdlib_test.go - src/go/types/stdlib_test.go Merge List: + 2025-09-25 d70ad4e740 sync/atomic: correct Uintptr.Or return doc + 2025-09-25 d7abfe4f0d runtime: acquire/release C TSAN lock when calling cgo symbolizer/tracebacker + 2025-09-25 393d91aea0 cmd/fix: remove all functionality + 2025-09-25 6dceff8bad cmd/link: handle -w flag in external linking mode + 2025-09-25 76d088eb74 cmd/internal/obj/riscv: remove ACFLWSP/ACFSWSP and ACFLW/ACFSW + 2025-09-25 5225e9dc49 doc/next: document new image/jpeg DCT in release notes + 2025-09-25 81a83bba21 cmd: update x/tools@4df13e3 + 2025-09-25 6b32c613ca go/types: make typeset return an iterator + 2025-09-25 fbba930271 image/jpeg: replace fdct.go and idct.go with new implementation in dct.go + 2025-09-25 92e093467f image/jpeg: correct and test reference slowFDCT and slowIDCT + 2025-09-25 27c7bbc51c image/jpeg: prepare for new FDCT/IDCT implementations + 2025-09-24 f15cd63ec4 cmd/compile: don't rely on loop info when there are irreducible loops + 2025-09-24 371c1d2fcb cmd/internal/obj/riscv: add support for vector unit-stride fault-only-first load instructions + 2025-09-23 411c250d64 runtime: add specialized malloc functions for sizes up to 512 bytes + 2025-09-23 d7a38adf4c runtime: eliminate global span queue [green tea] + 2025-09-23 7bc1935db5 cmd/compile/internal: support new(expr) + 2025-09-23 eb78f13c9f doc/go_spec.html: document new(expr) + 2025-09-23 74cc463f9e go/token: add TestRemovedFileFileReturnsNil test + 2025-09-23 902dc27ae9 go/token: clear cache after grabbing the mutex in RemoveFile + 2025-09-23 a13d085a5b cmd/cgo: don't hardcode section name in TestNumberOfExportedFunctions + 2025-09-23 61bf26a9ee cmd/link: fix Macho-O X86_64_RELOC_SUBTRACTOR in internal linking + 2025-09-23 4b787c8c2b reflect: remove stale comment in unpackEface + 2025-09-23 3df27cd21a cmd/compile: fix typo in comment + 2025-09-23 684e8d3363 reflect: allocate memory in TypeAssert[I] only when the assertion succeeds + 2025-09-23 a5866ebe40 cmd/compile: prevent shapifying of pointer shape type + 2025-09-23 a27261c42f go/types,types2: allow new(expr) + 2025-09-23 e93f439ac4 runtime/cgo: retry when CreateThread fails with ERROR_ACCESS_DENIED + 2025-09-23 69e74b0aac runtime: deduplicate pMask resize code + 2025-09-23 fde10c4ce7 runtime: split gcMarkWorkAvailable into two separate conditions + 2025-09-23 5d040df092 runtime: use scan kernels in scanSpan [green tea] + 2025-09-23 7e0251bf58 runtime: don't report non-blocked goroutines as "(durable)" in stacks + 2025-09-23 22ac328856 cmd/link: make -w behavior consistent on Windows Change-Id: Id76b5a30a3b6f6669437f97e3320c9bca65a1e96
2026-02-06 18:00:01 +00:00 · 2025-09-25 13:33:58 -04:00 · 2025-09-25 13:33:58 -04:00 · a693ae1e9a
commit a693ae1e9a
parent 5a78e1a4a1 d70ad4e740
114 changed files with 13651 additions and 4083 deletions
--- a/doc/go_spec.html
+++ b/doc/go_spec.html
@ -7806,12 +7806,32 @@ min(x, y, z) == min(min(x, y), z)
 <h3 id="Allocation">Allocation</h3>

 <p>
-The built-in function <code>new</code> takes a type <code>T</code>,
-allocates storage for a <a href="#Variables">variable</a> of that type
-at run time, and returns a value of type <code>*T</code>
-<a href="#Pointer_types">pointing</a> to it.
-The variable is initialized as described in the section on
-<a href="#The_zero_value">initial values</a>.
+  The built-in function <code>new</code> creates a new, initialized
+  <a href="#Variables">variable</a> and returns
+  a <a href="#Pointer_types">pointer</a> to it.
+
+  It accepts a single argument, which may be either an expression or a type.
+</p>
+<p>
+  If the argument <code>expr</code> is an expression of
+  type <code>T</code>, or an untyped constant expression
+  whose <a href="#Constants">default type</a> is <code>T</code>,
+  then <code>new(expr)</code> allocates a variable of
+  type <code>T</code>, initializes it to the value
+  of <code>expr</code>, and returns its address, a value of
+  type <code>*T</code>.
+</p>
+<p>
+  If the argument is a type <code>T</code>, then <code>new(T)</code>
+  allocates a variable initialized to
+  the <a href="#The_zero_value">zero value</a> of type <code>T</code>.
+</p>
+<p>
+  For example, <code>new(123)</code> and <code>new(int)</code> each
+  return a pointer to a new variable of type <code>int</code>.
+
+  The value of the first variable is <code>123</code>, and the value
+  of the second is <code>0</code>.
 </p>

 <pre class="grammar">
--- a/doc/next/2-language.md
+++ b/doc/next/2-language.md
@ -1,3 +1,28 @@
 ## Changes to the language {#language}

+<!-- https://go.dev/issue/45624 --->

+The built-in `new` function, which creates a new variable, now allows
+its operand to be an expression, specifying the initial value of the
+variable.
+
+This feature is particularly useful when working with serialization
+packages such as `encoding/json` or protocol buffers that use a
+pointer to represent an optional value, as it enables an optional
+field to be populated in a simple expression, for example:
+
+```go
+import "encoding/json"
+
+type Person struct {
+	Name string   `json:"name"`
+	Age  *int     `json:"age"` // age if known; nil otherwise
+}
+
+func personJSON(name string, age int) ([]byte, error) {
+	return json.Marshal(Person{
+		Name: name,
+		Age:  new(age),
+	})
+}
+```
--- a/doc/next/6-stdlib/99-minor/image/jpeg/75603.md
+++ b/doc/next/6-stdlib/99-minor/image/jpeg/75603.md
@ -0,0 +1,2 @@
+The JPEG encoder and decoder have been replaced with new, faster, more accurate implementations.
+Code that expects specific bit-for-bit outputs from the encoder or decoder may need to be updated.
--- a/src/cmd/asm/internal/asm/testdata/riscv64.s
+++ b/src/cmd/asm/internal/asm/testdata/riscv64.s
@ -549,6 +549,16 @@ start:
 	VSOXEI64V	V3, V2, (X10)			// a771250e
 	VSOXEI64V	V3, V2, V0, (X10)		// a771250c

+	// 31.7.7: Unit-stride Fault-Only-First Loads
+	VLE8FFV		(X10), V8			// 07040503
+	VLE16FFV	(X10), V8			// 07540503
+	VLE32FFV	(X10), V8			// 07640503
+	VLE64FFV	(X10), V8			// 07740503
+	VLE8FFV		(X10), V0, V8			// 07040501
+	VLE16FFV	(X10), V0, V8			// 07540501
+	VLE32FFV	(X10), V0, V8			// 07640501
+	VLE64FFV	(X10), V0, V8			// 07740501
+
 	// 31.7.8: Vector Load/Store Segment Instructions

 	// 31.7.8.1: Vector Unit-Stride Segment Loads and Stores
--- a/src/cmd/asm/internal/asm/testdata/riscv64error.s
+++ b/src/cmd/asm/internal/asm/testdata/riscv64error.s
@ -73,6 +73,7 @@ TEXT errors(SB),$0
 	//
 	VSETIVLI	X10, E32, M2, TA, MA, X12	// ERROR "expected immediate value"
 	VLE8V		(X10), V1, V3			// ERROR "invalid vector mask register"
+	VLE8FFV		(X10), V1, V3			// ERROR "invalid vector mask register"
 	VSE8V		V3, V1, (X10)			// ERROR "invalid vector mask register"
 	VLSE8V		(X10), X10, V1, V3		// ERROR "invalid vector mask register"
 	VSSE8V		V3, X11, V1, (X10)		// ERROR "invalid vector mask register"
--- a/src/cmd/asm/internal/asm/testdata/riscv64validation.s
+++ b/src/cmd/asm/internal/asm/testdata/riscv64validation.s
@ -20,6 +20,8 @@ TEXT validation(SB),$0
 	VSETVL		X10, X11			// ERROR "expected integer register in rs1 position"
 	VLE8V		(X10), X10			// ERROR "expected vector register in vd position"
 	VLE8V		(V1), V3			// ERROR "expected integer register in rs1 position"
+	VLE8FFV		(X10), X10			// ERROR "expected vector register in vd position"
+	VLE8FFV		(V1), V3			// ERROR "expected integer register in rs1 position"
 	VSE8V		X10, (X10)			// ERROR "expected vector register in vs1 position"
 	VSE8V		V3, (V1)			// ERROR "expected integer register in rd position"
 	VLSE8V		(X10), V3			// ERROR "expected integer register in rs2 position"
--- a/src/cmd/cgo/internal/testcshared/cshared_test.go
+++ b/src/cmd/cgo/internal/testcshared/cshared_test.go
@ -375,8 +375,111 @@ func TestExportedSymbols(t *testing.T) {
 	}
 }

-func checkNumberOfExportedFunctionsWindows(t *testing.T, exportAllSymbols bool) {
-	const prog = `
+func checkNumberOfExportedFunctionsWindows(t *testing.T, prog string, exportedFunctions int, wantAll bool) {
+	tmpdir := t.TempDir()
+
+	srcfile := filepath.Join(tmpdir, "test.go")
+	objfile := filepath.Join(tmpdir, "test.dll")
+	if err := os.WriteFile(srcfile, []byte(prog), 0666); err != nil {
+		t.Fatal(err)
+	}
+	argv := []string{"build", "-buildmode=c-shared"}
+	if wantAll {
+		argv = append(argv, "-ldflags", "-extldflags=-Wl,--export-all-symbols")
+	}
+	argv = append(argv, "-o", objfile, srcfile)
+	out, err := exec.Command(testenv.GoToolPath(t), argv...).CombinedOutput()
+	if err != nil {
+		t.Fatalf("build failure: %s\n%s\n", err, string(out))
+	}
+
+	f, err := pe.Open(objfile)
+	if err != nil {
+		t.Fatalf("pe.Open failed: %v", err)
+	}
+	defer f.Close()
+
+	_, pe64 := f.OptionalHeader.(*pe.OptionalHeader64)
+	// grab the export data directory entry
+	var idd pe.DataDirectory
+	if pe64 {
+		idd = f.OptionalHeader.(*pe.OptionalHeader64).DataDirectory[pe.IMAGE_DIRECTORY_ENTRY_EXPORT]
+	} else {
+		idd = f.OptionalHeader.(*pe.OptionalHeader32).DataDirectory[pe.IMAGE_DIRECTORY_ENTRY_EXPORT]
+	}
+
+	// figure out which section contains the import directory table
+	var section *pe.Section
+	for _, s := range f.Sections {
+		if s.Offset == 0 {
+			continue
+		}
+		if s.VirtualAddress <= idd.VirtualAddress && idd.VirtualAddress-s.VirtualAddress < s.VirtualSize {
+			section = s
+			break
+		}
+	}
+	if section == nil {
+		t.Fatal("no section contains export directory")
+	}
+	d, err := section.Data()
+	if err != nil {
+		t.Fatal(err)
+	}
+	// seek to the virtual address specified in the export data directory
+	d = d[idd.VirtualAddress-section.VirtualAddress:]
+
+	// TODO: deduplicate this struct from cmd/link/internal/ld/pe.go
+	type IMAGE_EXPORT_DIRECTORY struct {
+		_                 [2]uint32
+		_                 [2]uint16
+		_                 [2]uint32
+		NumberOfFunctions uint32
+		NumberOfNames     uint32
+		_                 [3]uint32
+	}
+	var e IMAGE_EXPORT_DIRECTORY
+	if err := binary.Read(bytes.NewReader(d), binary.LittleEndian, &e); err != nil {
+		t.Fatalf("binary.Read failed: %v", err)
+	}
+
+	// Only the two exported functions and _cgo_dummy_export should be exported.
+	// NumberOfNames is the number of functions exported with a unique name.
+	// NumberOfFunctions can be higher than that because it also counts
+	// functions exported only by ordinal, a unique number asigned by the linker,
+	// and linkers might add an unknown number of their own ordinal-only functions.
+	if wantAll {
+		if e.NumberOfNames <= uint32(exportedFunctions) {
+			t.Errorf("got %d exported names, want > %d", e.NumberOfNames, exportedFunctions)
+		}
+	} else {
+		if e.NumberOfNames > uint32(exportedFunctions) {
+			t.Errorf("got %d exported names, want <= %d", e.NumberOfNames, exportedFunctions)
+		}
+	}
+}
+
+func TestNumberOfExportedFunctions(t *testing.T) {
+	if GOOS != "windows" {
+		t.Skip("skipping windows only test")
+	}
+	globalSkip(t)
+	testenv.MustHaveGoBuild(t)
+	testenv.MustHaveCGO(t)
+	testenv.MustHaveBuildMode(t, "c-shared")
+
+	t.Parallel()
+
+	const prog0 = `
+package main
+
+import "C"
+
+func main() {
+}
+`
+
+	const prog2 = `
 package main

 import "C"
@ -394,84 +497,15 @@ func GoFunc2() {
 func main() {
 }
 `
-
-	tmpdir := t.TempDir()
-
-	srcfile := filepath.Join(tmpdir, "test.go")
-	objfile := filepath.Join(tmpdir, "test.dll")
-	if err := os.WriteFile(srcfile, []byte(prog), 0666); err != nil {
-		t.Fatal(err)
-	}
-	argv := []string{"build", "-buildmode=c-shared"}
-	if exportAllSymbols {
-		argv = append(argv, "-ldflags", "-extldflags=-Wl,--export-all-symbols")
-	}
-	argv = append(argv, "-o", objfile, srcfile)
-	out, err := exec.Command(testenv.GoToolPath(t), argv...).CombinedOutput()
-	if err != nil {
-		t.Fatalf("build failure: %s\n%s\n", err, string(out))
-	}
-
-	f, err := pe.Open(objfile)
-	if err != nil {
-		t.Fatalf("pe.Open failed: %v", err)
-	}
-	defer f.Close()
-	section := f.Section(".edata")
-	if section == nil {
-		t.Skip(".edata section is not present")
-	}
-
-	// TODO: deduplicate this struct from cmd/link/internal/ld/pe.go
-	type IMAGE_EXPORT_DIRECTORY struct {
-		_                 [2]uint32
-		_                 [2]uint16
-		_                 [2]uint32
-		NumberOfFunctions uint32
-		NumberOfNames     uint32
-		_                 [3]uint32
-	}
-	var e IMAGE_EXPORT_DIRECTORY
-	if err := binary.Read(section.Open(), binary.LittleEndian, &e); err != nil {
-		t.Fatalf("binary.Read failed: %v", err)
-	}
-
-	// Only the two exported functions and _cgo_dummy_export should be exported
-	expectedNumber := uint32(3)
-
-	if exportAllSymbols {
-		if e.NumberOfFunctions <= expectedNumber {
-			t.Fatalf("missing exported functions: %v", e.NumberOfFunctions)
-		}
-		if e.NumberOfNames <= expectedNumber {
-			t.Fatalf("missing exported names: %v", e.NumberOfNames)
-		}
-	} else {
-		if e.NumberOfFunctions != expectedNumber {
-			t.Fatalf("got %d exported functions; want %d", e.NumberOfFunctions, expectedNumber)
-		}
-		if e.NumberOfNames != expectedNumber {
-			t.Fatalf("got %d exported names; want %d", e.NumberOfNames, expectedNumber)
-		}
-	}
-}
-
-func TestNumberOfExportedFunctions(t *testing.T) {
-	if GOOS != "windows" {
-		t.Skip("skipping windows only test")
-	}
-	globalSkip(t)
-	testenv.MustHaveGoBuild(t)
-	testenv.MustHaveCGO(t)
-	testenv.MustHaveBuildMode(t, "c-shared")
-
-	t.Parallel()
-
-	t.Run("OnlyExported", func(t *testing.T) {
-		checkNumberOfExportedFunctionsWindows(t, false)
+	// All programs export _cgo_dummy_export, so add 1 to the expected counts.
+	t.Run("OnlyExported/0", func(t *testing.T) {
+		checkNumberOfExportedFunctionsWindows(t, prog0, 0+1, false)
+	})
+	t.Run("OnlyExported/2", func(t *testing.T) {
+		checkNumberOfExportedFunctionsWindows(t, prog2, 2+1, false)
 	})
 	t.Run("All", func(t *testing.T) {
-		checkNumberOfExportedFunctionsWindows(t, true)
+		checkNumberOfExportedFunctionsWindows(t, prog2, 2+1, true)
 	})
 }

--- a/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go
+++ b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/main.go
@ -0,0 +1,78 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+/*
+// Defined in tracebackctxt_c.c.
+extern void C1(void);
+extern void C2(void);
+extern void tcContext(void*);
+extern void tcTraceback(void*);
+extern void tcSymbolizer(void*);
+*/
+import "C"
+
+import (
+	"fmt"
+	"runtime"
+	"sync"
+	"unsafe"
+)
+
+// Regression test for https://go.dev/issue/73949. TSAN should not report races
+// on writes to the argument passed to the symbolizer function.
+//
+// Triggering this race requires calls to the symbolizer function with the same
+// argument pointer on multiple threads. The runtime passes a stack variable to
+// this function, so that means we need to get a single goroutine to execute on
+// two threads, calling the symbolizer function on each.
+//
+// runtime.CallersFrames / Next will call the symbolizer function (if there are
+// C frames). So the approach here is, with GOMAXPROCS=2, have 2 goroutines
+// that use CallersFrames over and over, both frequently calling Gosched in an
+// attempt to get picked up by the other P.
+
+var tracebackOK bool
+
+func main() {
+	runtime.GOMAXPROCS(2)
+	runtime.SetCgoTraceback(0, unsafe.Pointer(C.tcTraceback), unsafe.Pointer(C.tcContext), unsafe.Pointer(C.tcSymbolizer))
+	C.C1()
+	if tracebackOK {
+		fmt.Println("OK")
+	}
+}
+
+//export G1
+func G1() {
+	C.C2()
+}
+
+//export G2
+func G2() {
+	pc := make([]uintptr, 32)
+	n := runtime.Callers(0, pc)
+
+	var wg sync.WaitGroup
+	for range 2 {
+		wg.Go(func() {
+			for range 1000 {
+				cf := runtime.CallersFrames(pc[:n])
+				var frames []runtime.Frame
+				for {
+					frame, more := cf.Next()
+					frames = append(frames, frame)
+					if !more {
+						break
+					}
+				}
+				runtime.Gosched()
+			}
+		})
+	}
+	wg.Wait()
+
+	tracebackOK = true
+}
--- a/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c
+++ b/src/cmd/cgo/internal/testsanitizers/testdata/tsan_tracebackctxt/tracebackctxt_c.c
@ -0,0 +1,70 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// The C definitions for tracebackctxt.go. That file uses //export so
+// it can't put function definitions in the "C" import comment.
+
+#include <stdint.h>
+#include <stdio.h>
+
+// Functions exported from Go.
+extern void G1(void);
+extern void G2(void);
+
+void C1() {
+	G1();
+}
+
+void C2() {
+	G2();
+}
+
+struct cgoContextArg {
+	uintptr_t context;
+};
+
+struct cgoTracebackArg {
+	uintptr_t  context;
+	uintptr_t  sigContext;
+	uintptr_t* buf;
+	uintptr_t  max;
+};
+
+struct cgoSymbolizerArg {
+	uintptr_t   pc;
+	const char* file;
+	uintptr_t   lineno;
+	const char* func;
+	uintptr_t   entry;
+	uintptr_t   more;
+	uintptr_t   data;
+};
+
+void tcContext(void* parg) {
+	struct cgoContextArg* arg = (struct cgoContextArg*)(parg);
+	if (arg->context == 0) {
+		arg->context = 1;
+	}
+}
+
+void tcTraceback(void* parg) {
+	int base, i;
+	struct cgoTracebackArg* arg = (struct cgoTracebackArg*)(parg);
+	if (arg->max < 1) {
+		return;
+	}
+	arg->buf[0] = 6; // Chosen by fair dice roll.
+}
+
+void tcSymbolizer(void *parg) {
+	struct cgoSymbolizerArg* arg = (struct cgoSymbolizerArg*)(parg);
+	if (arg->pc == 0) {
+		return;
+	}
+	// Report two lines per PC returned by traceback, to test more handling.
+	arg->more = arg->file == NULL;
+	arg->file = "tracebackctxt.go";
+	arg->func = "cFunction";
+	arg->lineno = arg->pc + (arg->more << 16);
+}
--- a/src/cmd/cgo/internal/testsanitizers/tsan_test.go
+++ b/src/cmd/cgo/internal/testsanitizers/tsan_test.go
@ -56,6 +56,7 @@ func TestTSAN(t *testing.T) {
 		{src: "tsan13.go", needsRuntime: true},
 		{src: "tsan14.go", needsRuntime: true},
 		{src: "tsan15.go", needsRuntime: true},
+		{src: "tsan_tracebackctxt", needsRuntime: true}, // Subdirectory
 	}
 	for _, tc := range cases {
 		tc := tc
@ -67,7 +68,7 @@ func TestTSAN(t *testing.T) {
 			defer dir.RemoveAll(t)

 			outPath := dir.Join(name)
-			mustRun(t, config.goCmd("build", "-o", outPath, srcPath(tc.src)))
+			mustRun(t, config.goCmd("build", "-o", outPath, "./"+srcPath(tc.src)))

 			cmdArgs := []string{outPath}
 			if goos == "linux" {
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@ -215,7 +215,7 @@ const (
 	ORSH              // X >> Y
 	OAND              // X & Y
 	OANDNOT           // X &^ Y
-	ONEW              // new(X); corresponds to calls to new in source code
+	ONEW              // new(X); corresponds to calls to new(T) in source code
 	ONOT              // !X
 	OBITNOT           // ^X
 	OPLUS             // +X
--- a/src/cmd/compile/internal/ir/type.go
+++ b/src/cmd/compile/internal/ir/type.go
@ -42,6 +42,10 @@ func TypeNode(t *types.Type) Node {

 // A DynamicType represents a type expression whose exact type must be
 // computed dynamically.
+//
+// TODO(adonovan): I think "dynamic" is a misnomer here; it's really a
+// type with free type parameters that needs to be instantiated to obtain
+// a ground type for which an rtype can exist.
 type DynamicType struct {
 	miniExpr

--- a/src/cmd/compile/internal/noder/doc.go
+++ b/src/cmd/compile/internal/noder/doc.go
@ -87,7 +87,7 @@ constant for file bases and hence not encoded.
              [ Sync ]
              StringRef       // the (absolute) file name for the base
              Bool            // true if a file base, else a line base
-              // The below is ommitted for file bases.
+              // The below is omitted for file bases.
              [ Pos
                Uint64        // line
                Uint64 ]      // column
@ -99,7 +99,7 @@ without a PosBase have no line or column.

    Pos     = [ Sync ]
              Bool             // true if the position has a base
-              // The below is ommitted if the position has no base.
+              // The below is omitted if the position has no base.
              [ Ref[PosBase]
                Uint64         // line
                Uint64 ]       // column
@ -125,7 +125,7 @@ packages. The below package paths have special meaning.
    Pkg        = RefTable
                 [ Sync ]
                 StringRef      // path
-                 // The below is ommitted for the special package paths
+                 // The below is omitted for the special package paths
                 // "builtin" and "unsafe".
                 [ StringRef    // name
                   Imports ]
--- a/src/cmd/compile/internal/noder/reader.go
+++ b/src/cmd/compile/internal/noder/reader.go
@ -49,9 +49,6 @@ type pkgReader struct {
 	// but bitwise inverted so we can detect if we're missing the entry
 	// or not.
 	newindex []index
-
-	// indicates whether the data is reading during reshaping.
-	reshaping bool
 }

 func newPkgReader(pr pkgbits.PkgDecoder) *pkgReader {
@ -119,10 +116,6 @@ type reader struct {
 	// find parameters/results.
 	funarghack bool

-	// reshaping is used during reading exprReshape code, preventing
-	// the reader from shapifying the re-shaped type.
-	reshaping bool
-
 	// methodSym is the name of method's name, if reading a method.
 	// It's nil if reading a normal function or closure body.
 	methodSym *types.Sym
@ -937,8 +930,19 @@ func shapify(targ *types.Type, basic bool) *types.Type {
 	// types, and discarding struct field names and tags. However, we'll
 	// need to start tracking how type parameters are actually used to
 	// implement some of these optimizations.
+	pointerShaping := basic && targ.IsPtr() && !targ.Elem().NotInHeap()
+	// The exception is when the type parameter is a pointer to a type
+	// which `Type.HasShape()` returns true, but `Type.IsShape()` returns
+	// false, like `*[]go.shape.T`. This is because the type parameter is
+	// used to instantiate a generic function inside another generic function.
+	// In this case, we want to keep the targ as-is, otherwise, we may lose the
+	// original type after `*[]go.shape.T` is shapified to `*go.shape.uint8`.
+	// See issue #54535, #71184.
+	if pointerShaping && !targ.Elem().IsShape() && targ.Elem().HasShape() {
+		return targ
+	}
 	under := targ.Underlying()
-	if basic && targ.IsPtr() && !targ.Elem().NotInHeap() {
+	if pointerShaping {
 		under = types.NewPtr(types.Types[types.TUINT8])
 	}

@ -1014,25 +1018,7 @@ func (pr *pkgReader) objDictIdx(sym *types.Sym, idx index, implicits, explicits
 	// arguments.
 	for i, targ := range dict.targs {
 		basic := r.Bool()
-		isPointerShape := basic && targ.IsPtr() && !targ.Elem().NotInHeap()
-		// We should not do shapify during the reshaping process, see #71184.
-		// However, this only matters for shapify a pointer type, which will
-		// lose the original underlying type.
-		//
-		// Example with a pointer type:
-		//
-		// - First, shapifying *[]T -> *uint8
-		// - During the reshaping process, *uint8 is shapified to *go.shape.uint8
-		// - This ends up with a different type with the original *[]T
-		//
-		// For a non-pointer type:
-		//
-		// - int -> go.shape.int
-		// - go.shape.int -> go.shape.int
-		//
-		// We always end up with the identical type.
-		canShapify := !pr.reshaping || !isPointerShape
-		if dict.shaped && canShapify {
+		if dict.shaped {
 			dict.targs[i] = shapify(targ, basic)
 		}
 	}
@ -2445,8 +2431,16 @@ func (r *reader) expr() (res ir.Node) {

 	case exprNew:
 		pos := r.pos()
-		typ := r.exprType()
-		return typecheck.Expr(ir.NewUnaryExpr(pos, ir.ONEW, typ))
+		if r.Bool() {
+			// new(expr) -> tmp := expr; &tmp
+			x := r.expr()
+			var init ir.Nodes
+			addr := ir.NewAddrExpr(pos, r.tempCopy(pos, x, &init))
+			addr.SetInit(init)
+			return typecheck.Expr(addr)
+		}
+		// new(T)
+		return typecheck.Expr(ir.NewUnaryExpr(pos, ir.ONEW, r.exprType()))

 	case exprSizeof:
 		return ir.NewUintptr(r.pos(), r.typ().Size())
@ -2470,10 +2464,7 @@ func (r *reader) expr() (res ir.Node) {

 	case exprReshape:
 		typ := r.typ()
-		old := r.reshaping
-		r.reshaping = true
 		x := r.expr()
-		r.reshaping = old

 		if types.IdenticalStrict(x.Type(), typ) {
 			return x
@ -2596,10 +2587,7 @@ func (r *reader) funcInst(pos src.XPos) (wrapperFn, baseFn, dictPtr ir.Node) {
 		info := r.dict.subdicts[idx]
 		explicits := r.p.typListIdx(info.explicits, r.dict)

-		old := r.p.reshaping
-		r.p.reshaping = r.reshaping
 		baseFn = r.p.objIdx(info.idx, implicits, explicits, true).(*ir.Name)
-		r.p.reshaping = old

 		// TODO(mdempsky): Is there a more robust way to get the
 		// dictionary pointer type here?
@ -3259,6 +3247,7 @@ func (r *reader) exprType() ir.Node {
 	var rtype, itab ir.Node

 	if r.Bool() {
+		// non-empty interface
 		typ, rtype, _, _, itab = r.itab(pos)
 		if !typ.IsInterface() {
 			rtype = nil // TODO(mdempsky): Leave set?
--- a/src/cmd/compile/internal/noder/writer.go
+++ b/src/cmd/compile/internal/noder/writer.go
@ -2035,10 +2035,16 @@ func (w *writer) expr(expr syntax.Expr) {
 			case "new":
 				assert(len(expr.ArgList) == 1)
 				assert(!expr.HasDots)
+				arg := expr.ArgList[0]

 				w.Code(exprNew)
 				w.pos(expr)
-				w.exprType(nil, expr.ArgList[0])
+				tv := w.p.typeAndValue(arg)
+				if w.Bool(!tv.IsType()) {
+					w.expr(arg) // new(expr), go1.26
+				} else {
+					w.exprType(nil, arg) // new(T)
+				}
 				return

 			case "Sizeof":
--- a/src/cmd/compile/internal/ssa/tighten.go
+++ b/src/cmd/compile/internal/ssa/tighten.go
@ -123,18 +123,21 @@ func tighten(f *Func) {

 		// If the target location is inside a loop,
 		// move the target location up to just before the loop head.
-		for _, b := range f.Blocks {
-			origloop := loops.b2l[b.ID]
-			for _, v := range b.Values {
-				t := target[v.ID]
-				if t == nil {
-					continue
-				}
-				targetloop := loops.b2l[t.ID]
-				for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
-					t = idom[targetloop.header.ID]
-					target[v.ID] = t
-					targetloop = loops.b2l[t.ID]
+		if !loops.hasIrreducible {
+			// Loop info might not be correct for irreducible loops. See issue 75569.
+			for _, b := range f.Blocks {
+				origloop := loops.b2l[b.ID]
+				for _, v := range b.Values {
+					t := target[v.ID]
+					if t == nil {
+						continue
+					}
+					targetloop := loops.b2l[t.ID]
+					for targetloop != nil && (origloop == nil || targetloop.depth > origloop.depth) {
+						t = idom[targetloop.header.ID]
+						target[v.ID] = t
+						targetloop = loops.b2l[t.ID]
+					}
 				}
 			}
 		}
--- a/src/cmd/compile/internal/types2/builtins.go
+++ b/src/cmd/compile/internal/types2/builtins.go
@ -98,17 +98,17 @@ func (check *Checker) builtin(x *operand, call *syntax.CallExpr, id builtinId) (
 			if ok, _ := x.assignableTo(check, NewSlice(universeByte), nil); ok {
 				y := args[1]
 				hasString := false
-				typeset(y.typ, func(_, u Type) bool {
+				for _, u := range typeset(y.typ) {
 					if s, _ := u.(*Slice); s != nil && Identical(s.elem, universeByte) {
-						return true
-					}
-					if isString(u) {
+						// typeset ⊇ {[]byte}
+					} else if isString(u) {
+						// typeset ⊇ {string}
 						hasString = true
-						return true
+					} else {
+						y = nil
+						break
 					}
-					y = nil
-					return false
-				})
+				}
 				if y != nil && hasString {
 					// setting the signature also signals that we're done
 					sig = makeSig(x.typ, x.typ, y.typ)
@ -368,16 +368,16 @@ func (check *Checker) builtin(x *operand, call *syntax.CallExpr, id builtinId) (
 		var special bool
 		if ok, _ := x.assignableTo(check, NewSlice(universeByte), nil); ok {
 			special = true
-			typeset(y.typ, func(_, u Type) bool {
+			for _, u := range typeset(y.typ) {
 				if s, _ := u.(*Slice); s != nil && Identical(s.elem, universeByte) {
-					return true
+					// typeset ⊇ {[]byte}
+				} else if isString(u) {
+					// typeset ⊇ {string}
+				} else {
+					special = false
+					break
 				}
-				if isString(u) {
-					return true
-				}
-				special = false
-				return false
-			})
+			}
 		}

 		// general case
@ -636,11 +636,30 @@ func (check *Checker) builtin(x *operand, call *syntax.CallExpr, id builtinId) (
 		}

 	case _New:
-		// new(T)
+		// new(T) or new(expr)
 		// (no argument evaluated yet)
-		T := check.varType(argList[0])
-		if !isValid(T) {
-			return
+		arg := argList[0]
+		check.exprOrType(x, arg, true)
+		var T Type
+		switch x.mode {
+		case builtin:
+			check.errorf(x, UncalledBuiltin, "%s must be called", x)
+			x.mode = invalid
+		case typexpr:
+			// new(T)
+			T = x.typ
+			if !isValid(T) {
+				return
+			}
+		default:
+			// new(expr)
+			check.verifyVersionf(call.Fun, go1_26, "new(expr)")
+			T = Default(x.typ)
+			if T != x.typ {
+				// untyped constant: check for overflow.
+				check.assignment(x, T, "argument to new")
+			}
+			check.validVarType(arg, T)
 		}

 		x.mode = value
@ -961,29 +980,22 @@ func (check *Checker) builtin(x *operand, call *syntax.CallExpr, id builtinId) (
 // or a type error if x is not a slice (or a type set of slices).
 func sliceElem(x *operand) (Type, *typeError) {
 	var E Type
-	var err *typeError
-	typeset(x.typ, func(_, u Type) bool {
+	for _, u := range typeset(x.typ) {
 		s, _ := u.(*Slice)
 		if s == nil {
 			if x.isNil() {
 				// Printing x in this case would just print "nil".
 				// Special case this so we can emphasize "untyped".
-				err = typeErrorf("argument must be a slice; have untyped nil")
+				return nil, typeErrorf("argument must be a slice; have untyped nil")
 			} else {
-				err = typeErrorf("argument must be a slice; have %s", x)
+				return nil, typeErrorf("argument must be a slice; have %s", x)
 			}
-			return false
 		}
 		if E == nil {
 			E = s.elem
 		} else if !Identical(E, s.elem) {
-			err = typeErrorf("mismatched slice element types %s and %s in %s", E, s.elem, x)
-			return false
+			return nil, typeErrorf("mismatched slice element types %s and %s in %s", E, s.elem, x)
 		}
-		return true
-	})
-	if err != nil {
-		return nil, err
 	}
 	return E, nil
 }
--- a/src/cmd/compile/internal/types2/index.go
+++ b/src/cmd/compile/internal/types2/index.go
@ -216,11 +216,11 @@ func (check *Checker) sliceExpr(x *operand, e *syntax.SliceExpr) {
 	// determine common underlying type cu
 	var ct, cu Type // type and respective common underlying type
 	var hasString bool
-	typeset(x.typ, func(t, u Type) bool {
+	for t, u := range typeset(x.typ) {
 		if u == nil {
 			check.errorf(x, NonSliceableOperand, "cannot slice %s: no specific type in %s", x, x.typ)
 			cu = nil
-			return false
+			break
 		}

 		// Treat strings like byte slices but remember that we saw a string.
@ -232,18 +232,16 @@ func (check *Checker) sliceExpr(x *operand, e *syntax.SliceExpr) {
 		// If this is the first type we're seeing, we're done.
 		if cu == nil {
 			ct, cu = t, u
-			return true
+			continue
 		}

 		// Otherwise, the current type must have the same underlying type as all previous types.
 		if !Identical(cu, u) {
 			check.errorf(x, NonSliceableOperand, "cannot slice %s: %s and %s have different underlying types", x, ct, t)
 			cu = nil
-			return false
+			break
 		}
-
-		return true
-	})
+	}
 	if hasString {
 		// If we saw a string, proceed with string type,
 		// but don't go from untyped string to string.
--- a/src/cmd/compile/internal/types2/signature.go
+++ b/src/cmd/compile/internal/types2/signature.go
@ -49,7 +49,7 @@ func NewSignatureType(recv *Var, recvTypeParams, typeParams []*TypeParam, params
 		}
 		last := params.At(n - 1).typ
 		var S *Slice
-		typeset(last, func(t, _ Type) bool {
+		for t := range typeset(last) {
 			var s *Slice
 			if isString(t) {
 				s = NewSlice(universeByte)
@ -60,10 +60,9 @@ func NewSignatureType(recv *Var, recvTypeParams, typeParams []*TypeParam, params
 				S = s
 			} else if !Identical(S, s) {
 				S = nil
-				return false
+				break
 			}
-			return true
-		})
+		}
 		if S == nil {
 			panic(fmt.Sprintf("got %s, want variadic parameter of unnamed slice or string type", last))
 		}
--- a/src/cmd/compile/internal/types2/stdlib_test.go
+++ b/src/cmd/compile/internal/types2/stdlib_test.go
@ -360,6 +360,7 @@ func TestStdKen(t *testing.T) {
 var excluded = map[string]bool{
 	"builtin":                       true,
 	"cmd/compile/internal/ssa/_gen": true,
+	"runtime/_mkmalloc":             true,
 	"simd/_gen/simdgen":             true,
 	"simd/_gen/unify":               true,
 }
--- a/src/cmd/compile/internal/types2/typeparam.go
+++ b/src/cmd/compile/internal/types2/typeparam.go
@ -155,10 +155,10 @@ func (t *TypeParam) is(f func(*term) bool) bool {
 	return t.iface().typeSet().is(f)
 }

-// typeset is an iterator over the (type/underlying type) pairs of the
+// typeset reports whether f(t, y) is true for all (type/underlying type) pairs of the
 // specific type terms of t's constraint.
-// If there are no specific terms, typeset calls yield with (nil, nil).
-// In any case, typeset is guaranteed to call yield at least once.
-func (t *TypeParam) typeset(yield func(t, u Type) bool) {
-	t.iface().typeSet().typeset(yield)
+// If there are no specific terms, typeset returns f(nil, nil).
+// In any case, typeset is guaranteed to call f at least once.
+func (t *TypeParam) typeset(f func(t, u Type) bool) bool {
+	return t.iface().typeSet().all(f)
 }
--- a/src/cmd/compile/internal/types2/typeset.go
+++ b/src/cmd/compile/internal/types2/typeset.go
@ -104,13 +104,12 @@ func (s *_TypeSet) hasTerms() bool { return !s.terms.isEmpty() && !s.terms.isAll
 // subsetOf reports whether s1 ⊆ s2.
 func (s1 *_TypeSet) subsetOf(s2 *_TypeSet) bool { return s1.terms.subsetOf(s2.terms) }

-// typeset is an iterator over the (type/underlying type) pairs in s.
-// If s has no specific terms, typeset calls yield with (nil, nil).
-// In any case, typeset is guaranteed to call yield at least once.
-func (s *_TypeSet) typeset(yield func(t, u Type) bool) {
+// all reports whether f(t, u) is true for each (type/underlying type) pairs in s.
+// If s has no specific terms, all calls f(nil, nil).
+// In any case, all is guaranteed to call f at least once.
+func (s *_TypeSet) all(f func(t, u Type) bool) bool {
 	if !s.hasTerms() {
-		yield(nil, nil)
-		return
+		return f(nil, nil)
 	}

 	for _, t := range s.terms {
@ -123,10 +122,11 @@ func (s *_TypeSet) typeset(yield func(t, u Type) bool) {
 		if debug {
 			assert(Identical(u, under(u)))
 		}
-		if !yield(t.typ, u) {
-			break
+		if !f(t.typ, u) {
+			return false
 		}
 	}
+	return true
 }

 // is calls f with the specific type terms of s and reports whether
--- a/src/cmd/compile/internal/types2/under.go
+++ b/src/cmd/compile/internal/types2/under.go
@ -4,6 +4,8 @@

 package types2

+import "iter"
+
 // under returns the true expanded underlying type.
 // If it doesn't exist, the result is Typ[Invalid].
 // under must only be called when a type is known
@ -18,12 +20,18 @@ func under(t Type) Type {
 // If typ is a type parameter, underIs returns the result of typ.underIs(f).
 // Otherwise, underIs returns the result of f(under(typ)).
 func underIs(typ Type, f func(Type) bool) bool {
-	var ok bool
-	typeset(typ, func(_, u Type) bool {
-		ok = f(u)
-		return ok
+	return all(typ, func(_, u Type) bool {
+		return f(u)
 	})
-	return ok
+}
+
+// all reports whether f(t, u) is true for all (type/underlying type)
+// pairs in the typeset of t. See [typeset] for details of sequence.
+func all(t Type, f func(t, u Type) bool) bool {
+	if p, _ := Unalias(t).(*TypeParam); p != nil {
+		return p.typeset(f)
+	}
+	return f(t, under(t))
 }

 // typeset is an iterator over the (type/underlying type) pairs of the
@ -32,12 +40,10 @@ func underIs(typ Type, f func(Type) bool) bool {
 // In that case, if there are no specific terms, typeset calls yield with (nil, nil).
 // If t is not a type parameter, the implied type set consists of just t.
 // In any case, typeset is guaranteed to call yield at least once.
-func typeset(t Type, yield func(t, u Type) bool) {
-	if p, _ := Unalias(t).(*TypeParam); p != nil {
-		p.typeset(yield)
-		return
+func typeset(t Type) iter.Seq2[Type, Type] {
+	return func(yield func(t, u Type) bool) {
+		_ = all(t, yield)
 	}
-	yield(t, under(t))
 }

 // A typeError describes a type error.
@ -80,35 +86,28 @@ func (err *typeError) format(check *Checker) string {
 // with the single type t in its type set.
 func commonUnder(t Type, cond func(t, u Type) *typeError) (Type, *typeError) {
 	var ct, cu Type // type and respective common underlying type
-	var err *typeError
-
-	bad := func(format string, args ...any) bool {
-		err = typeErrorf(format, args...)
-		return false
-	}
-
-	typeset(t, func(t, u Type) bool {
+	for t, u := range typeset(t) {
 		if cond != nil {
-			if err = cond(t, u); err != nil {
-				return false
+			if err := cond(t, u); err != nil {
+				return nil, err
 			}
 		}

 		if u == nil {
-			return bad("no specific type")
+			return nil, typeErrorf("no specific type")
 		}

 		// If this is the first type we're seeing, we're done.
 		if cu == nil {
 			ct, cu = t, u
-			return true
+			continue
 		}

 		// If we've seen a channel before, and we have a channel now, they must be compatible.
 		if chu, _ := cu.(*Chan); chu != nil {
 			if ch, _ := u.(*Chan); ch != nil {
 				if !Identical(chu.elem, ch.elem) {
-					return bad("channels %s and %s have different element types", ct, t)
+					return nil, typeErrorf("channels %s and %s have different element types", ct, t)
 				}
 				// If we have different channel directions, keep the restricted one
 				// and complain if they conflict.
@ -118,22 +117,16 @@ func commonUnder(t Type, cond func(t, u Type) *typeError) (Type, *typeError) {
 				case chu.dir == SendRecv:
 					ct, cu = t, u // switch to restricted channel
 				case ch.dir != SendRecv:
-					return bad("channels %s and %s have conflicting directions", ct, t)
+					return nil, typeErrorf("channels %s and %s have conflicting directions", ct, t)
 				}
-				return true
+				continue
 			}
 		}

 		// Otherwise, the current type must have the same underlying type as all previous types.
 		if !Identical(cu, u) {
-			return bad("%s and %s have different underlying types", ct, t)
+			return nil, typeErrorf("%s and %s have different underlying types", ct, t)
 		}
-
-		return true
-	})
-
-	if err != nil {
-		return nil, err
 	}
 	return cu, nil
 }
--- a/src/cmd/compile/internal/types2/version.go
+++ b/src/cmd/compile/internal/types2/version.go
@ -43,6 +43,7 @@ var (
 	go1_21 = asGoVersion("go1.21")
 	go1_22 = asGoVersion("go1.22")
 	go1_23 = asGoVersion("go1.23")
+	go1_26 = asGoVersion("go1.26")

 	// current (deployed) Go version
 	go_current = asGoVersion(fmt.Sprintf("go1.%d", goversion.Version))
--- a/src/cmd/compile/testdata/script/issue75461.txt
+++ b/src/cmd/compile/testdata/script/issue75461.txt
@ -0,0 +1,78 @@
+go build main.go
+! stdout .
+! stderr .
+
+-- main.go --
+package main
+
+import (
+	"demo/registry"
+)
+
+func main() {
+	_ = registry.NewUserRegistry()
+}
+
+-- go.mod --
+module demo
+
+go 1.24
+
+-- model/user.go --
+package model
+
+type User struct {
+	ID int
+}
+
+func (c *User) String() string {
+	return ""
+}
+
+-- ordered/map.go --
+package ordered
+
+type OrderedMap[K comparable, V any] struct {
+	m map[K]V
+}
+
+func New[K comparable, V any](options ...any) *OrderedMap[K, V] {
+	orderedMap := &OrderedMap[K, V]{}
+	return orderedMap
+}
+
+-- registry/user.go --
+package registry
+
+import (
+	"demo/model"
+	"demo/ordered"
+)
+
+type baseRegistry = Registry[model.User, *model.User]
+
+type UserRegistry struct {
+	*baseRegistry
+}
+
+type Registry[T any, P PStringer[T]] struct {
+	m *ordered.OrderedMap[string, P]
+}
+
+type PStringer[T any] interface {
+	*T
+	String() string
+}
+
+func NewRegistry[T any, P PStringer[T]]() *Registry[T, P] {
+	r := &Registry[T, P]{
+		m: ordered.New[string, P](),
+	}
+	return r
+}
+
+func NewUserRegistry() *UserRegistry {
+	return &UserRegistry{
+		baseRegistry: NewRegistry[model.User](),
+	}
+}
--- a/src/cmd/fix/buildtag.go
+++ b/src/cmd/fix/buildtag.go
@ -1,52 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"go/ast"
-	"go/version"
-	"strings"
-)
-
-func init() {
-	register(buildtagFix)
-}
-
-const buildtagGoVersionCutoff = "go1.18"
-
-var buildtagFix = fix{
-	name: "buildtag",
-	date: "2021-08-25",
-	f:    buildtag,
-	desc: `Remove +build comments from modules using Go 1.18 or later`,
-}
-
-func buildtag(f *ast.File) bool {
-	if version.Compare(*goVersion, buildtagGoVersionCutoff) < 0 {
-		return false
-	}
-
-	// File is already gofmt-ed, so we know that if there are +build lines,
-	// they are in a comment group that starts with a //go:build line followed
-	// by a blank line. While we cannot delete comments from an AST and
-	// expect consistent output in general, this specific case - deleting only
-	// some lines from a comment block - does format correctly.
-	fixed := false
-	for _, g := range f.Comments {
-		sawGoBuild := false
-		for i, c := range g.List {
-			if strings.HasPrefix(c.Text, "//go:build ") {
-				sawGoBuild = true
-			}
-			if sawGoBuild && strings.HasPrefix(c.Text, "// +build ") {
-				g.List = g.List[:i]
-				fixed = true
-				break
-			}
-		}
-	}
-
-	return fixed
-}
--- a/src/cmd/fix/buildtag_test.go
+++ b/src/cmd/fix/buildtag_test.go
@ -1,34 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	addTestCases(buildtagTests, buildtag)
-}
-
-var buildtagTests = []testCase{
-	{
-		Name:    "buildtag.oldGo",
-		Version: "go1.10",
-		In: `//go:build yes
-// +build yes
-
-package main
-`,
-	},
-	{
-		Name:    "buildtag.new",
-		Version: "go1.99",
-		In: `//go:build yes
-// +build yes
-
-package main
-`,
-		Out: `//go:build yes
-
-package main
-`,
-	},
-}
--- a/src/cmd/fix/cftype.go
+++ b/src/cmd/fix/cftype.go
@ -1,25 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"go/ast"
-)
-
-func init() {
-	register(cftypeFix)
-}
-
-var cftypeFix = fix{
-	name:     "cftype",
-	date:     "2017-09-27",
-	f:        noop,
-	desc:     `Fixes initializers and casts of C.*Ref and JNI types (removed)`,
-	disabled: false,
-}
-
-func noop(f *ast.File) bool {
-	return false
-}
--- a/src/cmd/fix/context.go
+++ b/src/cmd/fix/context.go
@ -1,17 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(contextFix)
-}
-
-var contextFix = fix{
-	name:     "context",
-	date:     "2016-09-09",
-	f:        noop,
-	desc:     `Change imports of golang.org/x/net/context to context (removed)`,
-	disabled: false,
-}
--- a/src/cmd/fix/doc.go
+++ b/src/cmd/fix/doc.go
@ -9,29 +9,12 @@ the necessary changes to your programs.

 Usage:

-	go tool fix [-r name,...] [path ...]
+	go tool fix [ignored...]

-Without an explicit path, fix reads standard input and writes the
-result to standard output.
-
-If the named path is a file, fix rewrites the named files in place.
-If the named path is a directory, fix rewrites all .go files in that
-directory tree.  When fix rewrites a file, it prints a line to standard
-error giving the name of the file and the rewrite applied.
-
-If the -diff flag is set, no files are rewritten. Instead fix prints
-the differences a rewrite would introduce.
-
-The -r flag restricts the set of rewrites considered to those in the
-named list.  By default fix considers all known rewrites.  Fix's
-rewrites are idempotent, so that it is safe to apply fix to updated
-or partially updated code even without using the -r flag.
-
-Fix prints the full list of fixes it can apply in its help output;
-to see them, run go tool fix -help.
-
-Fix does not make backup copies of the files that it edits.
-Instead, use a version control system's “diff” functionality to inspect
-the changes that fix makes before committing them.
+This tool is currently in transition. All its historical fixers were
+long obsolete and have been removed, so it is currently a no-op. In
+due course the tool will integrate with the Go analysis framework
+(golang.org/x/tools/go/analysis) and run a modern suite of fix
+algorithms; see https://go.dev/issue/71859.
 */
 package main
--- a/src/cmd/fix/egltype.go
+++ b/src/cmd/fix/egltype.go
@ -1,26 +0,0 @@
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(eglFixDisplay)
-	register(eglFixConfig)
-}
-
-var eglFixDisplay = fix{
-	name:     "egl",
-	date:     "2018-12-15",
-	f:        noop,
-	desc:     `Fixes initializers of EGLDisplay (removed)`,
-	disabled: false,
-}
-
-var eglFixConfig = fix{
-	name:     "eglconf",
-	date:     "2020-05-30",
-	f:        noop,
-	desc:     `Fixes initializers of EGLConfig (removed)`,
-	disabled: false,
-}
--- a/src/cmd/fix/fix.go
+++ b/src/cmd/fix/fix.go
@ -1,552 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"fmt"
-	"go/ast"
-	"go/token"
-	"path"
-	"strconv"
-)
-
-type fix struct {
-	name     string
-	date     string // date that fix was introduced, in YYYY-MM-DD format
-	f        func(*ast.File) bool
-	desc     string
-	disabled bool // whether this fix should be disabled by default
-}
-
-var fixes []fix
-
-func register(f fix) {
-	fixes = append(fixes, f)
-}
-
-// walk traverses the AST x, calling visit(y) for each node y in the tree but
-// also with a pointer to each ast.Expr, ast.Stmt, and *ast.BlockStmt,
-// in a bottom-up traversal.
-func walk(x any, visit func(any)) {
-	walkBeforeAfter(x, nop, visit)
-}
-
-func nop(any) {}
-
-// walkBeforeAfter is like walk but calls before(x) before traversing
-// x's children and after(x) afterward.
-func walkBeforeAfter(x any, before, after func(any)) {
-	before(x)
-
-	switch n := x.(type) {
-	default:
-		panic(fmt.Errorf("unexpected type %T in walkBeforeAfter", x))
-
-	case nil:
-
-	// pointers to interfaces
-	case *ast.Decl:
-		walkBeforeAfter(*n, before, after)
-	case *ast.Expr:
-		walkBeforeAfter(*n, before, after)
-	case *ast.Spec:
-		walkBeforeAfter(*n, before, after)
-	case *ast.Stmt:
-		walkBeforeAfter(*n, before, after)
-
-	// pointers to struct pointers
-	case **ast.BlockStmt:
-		walkBeforeAfter(*n, before, after)
-	case **ast.CallExpr:
-		walkBeforeAfter(*n, before, after)
-	case **ast.FieldList:
-		walkBeforeAfter(*n, before, after)
-	case **ast.FuncType:
-		walkBeforeAfter(*n, before, after)
-	case **ast.Ident:
-		walkBeforeAfter(*n, before, after)
-	case **ast.BasicLit:
-		walkBeforeAfter(*n, before, after)
-
-	// pointers to slices
-	case *[]ast.Decl:
-		walkBeforeAfter(*n, before, after)
-	case *[]ast.Expr:
-		walkBeforeAfter(*n, before, after)
-	case *[]*ast.File:
-		walkBeforeAfter(*n, before, after)
-	case *[]*ast.Ident:
-		walkBeforeAfter(*n, before, after)
-	case *[]ast.Spec:
-		walkBeforeAfter(*n, before, after)
-	case *[]ast.Stmt:
-		walkBeforeAfter(*n, before, after)
-
-	// These are ordered and grouped to match ../../go/ast/ast.go
-	case *ast.Field:
-		walkBeforeAfter(&n.Names, before, after)
-		walkBeforeAfter(&n.Type, before, after)
-		walkBeforeAfter(&n.Tag, before, after)
-	case *ast.FieldList:
-		for _, field := range n.List {
-			walkBeforeAfter(field, before, after)
-		}
-	case *ast.BadExpr:
-	case *ast.Ident:
-	case *ast.Ellipsis:
-		walkBeforeAfter(&n.Elt, before, after)
-	case *ast.BasicLit:
-	case *ast.FuncLit:
-		walkBeforeAfter(&n.Type, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.CompositeLit:
-		walkBeforeAfter(&n.Type, before, after)
-		walkBeforeAfter(&n.Elts, before, after)
-	case *ast.ParenExpr:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.SelectorExpr:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.IndexExpr:
-		walkBeforeAfter(&n.X, before, after)
-		walkBeforeAfter(&n.Index, before, after)
-	case *ast.IndexListExpr:
-		walkBeforeAfter(&n.X, before, after)
-		walkBeforeAfter(&n.Indices, before, after)
-	case *ast.SliceExpr:
-		walkBeforeAfter(&n.X, before, after)
-		if n.Low != nil {
-			walkBeforeAfter(&n.Low, before, after)
-		}
-		if n.High != nil {
-			walkBeforeAfter(&n.High, before, after)
-		}
-	case *ast.TypeAssertExpr:
-		walkBeforeAfter(&n.X, before, after)
-		walkBeforeAfter(&n.Type, before, after)
-	case *ast.CallExpr:
-		walkBeforeAfter(&n.Fun, before, after)
-		walkBeforeAfter(&n.Args, before, after)
-	case *ast.StarExpr:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.UnaryExpr:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.BinaryExpr:
-		walkBeforeAfter(&n.X, before, after)
-		walkBeforeAfter(&n.Y, before, after)
-	case *ast.KeyValueExpr:
-		walkBeforeAfter(&n.Key, before, after)
-		walkBeforeAfter(&n.Value, before, after)
-
-	case *ast.ArrayType:
-		walkBeforeAfter(&n.Len, before, after)
-		walkBeforeAfter(&n.Elt, before, after)
-	case *ast.StructType:
-		walkBeforeAfter(&n.Fields, before, after)
-	case *ast.FuncType:
-		if n.TypeParams != nil {
-			walkBeforeAfter(&n.TypeParams, before, after)
-		}
-		walkBeforeAfter(&n.Params, before, after)
-		if n.Results != nil {
-			walkBeforeAfter(&n.Results, before, after)
-		}
-	case *ast.InterfaceType:
-		walkBeforeAfter(&n.Methods, before, after)
-	case *ast.MapType:
-		walkBeforeAfter(&n.Key, before, after)
-		walkBeforeAfter(&n.Value, before, after)
-	case *ast.ChanType:
-		walkBeforeAfter(&n.Value, before, after)
-
-	case *ast.BadStmt:
-	case *ast.DeclStmt:
-		walkBeforeAfter(&n.Decl, before, after)
-	case *ast.EmptyStmt:
-	case *ast.LabeledStmt:
-		walkBeforeAfter(&n.Stmt, before, after)
-	case *ast.ExprStmt:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.SendStmt:
-		walkBeforeAfter(&n.Chan, before, after)
-		walkBeforeAfter(&n.Value, before, after)
-	case *ast.IncDecStmt:
-		walkBeforeAfter(&n.X, before, after)
-	case *ast.AssignStmt:
-		walkBeforeAfter(&n.Lhs, before, after)
-		walkBeforeAfter(&n.Rhs, before, after)
-	case *ast.GoStmt:
-		walkBeforeAfter(&n.Call, before, after)
-	case *ast.DeferStmt:
-		walkBeforeAfter(&n.Call, before, after)
-	case *ast.ReturnStmt:
-		walkBeforeAfter(&n.Results, before, after)
-	case *ast.BranchStmt:
-	case *ast.BlockStmt:
-		walkBeforeAfter(&n.List, before, after)
-	case *ast.IfStmt:
-		walkBeforeAfter(&n.Init, before, after)
-		walkBeforeAfter(&n.Cond, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-		walkBeforeAfter(&n.Else, before, after)
-	case *ast.CaseClause:
-		walkBeforeAfter(&n.List, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.SwitchStmt:
-		walkBeforeAfter(&n.Init, before, after)
-		walkBeforeAfter(&n.Tag, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.TypeSwitchStmt:
-		walkBeforeAfter(&n.Init, before, after)
-		walkBeforeAfter(&n.Assign, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.CommClause:
-		walkBeforeAfter(&n.Comm, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.SelectStmt:
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.ForStmt:
-		walkBeforeAfter(&n.Init, before, after)
-		walkBeforeAfter(&n.Cond, before, after)
-		walkBeforeAfter(&n.Post, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-	case *ast.RangeStmt:
-		walkBeforeAfter(&n.Key, before, after)
-		walkBeforeAfter(&n.Value, before, after)
-		walkBeforeAfter(&n.X, before, after)
-		walkBeforeAfter(&n.Body, before, after)
-
-	case *ast.ImportSpec:
-	case *ast.ValueSpec:
-		walkBeforeAfter(&n.Type, before, after)
-		walkBeforeAfter(&n.Values, before, after)
-		walkBeforeAfter(&n.Names, before, after)
-	case *ast.TypeSpec:
-		if n.TypeParams != nil {
-			walkBeforeAfter(&n.TypeParams, before, after)
-		}
-		walkBeforeAfter(&n.Type, before, after)
-
-	case *ast.BadDecl:
-	case *ast.GenDecl:
-		walkBeforeAfter(&n.Specs, before, after)
-	case *ast.FuncDecl:
-		if n.Recv != nil {
-			walkBeforeAfter(&n.Recv, before, after)
-		}
-		walkBeforeAfter(&n.Type, before, after)
-		if n.Body != nil {
-			walkBeforeAfter(&n.Body, before, after)
-		}
-
-	case *ast.File:
-		walkBeforeAfter(&n.Decls, before, after)
-
-	case *ast.Package:
-		walkBeforeAfter(&n.Files, before, after)
-
-	case []*ast.File:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	case []ast.Decl:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	case []ast.Expr:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	case []*ast.Ident:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	case []ast.Stmt:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	case []ast.Spec:
-		for i := range n {
-			walkBeforeAfter(&n[i], before, after)
-		}
-	}
-	after(x)
-}
-
-// imports reports whether f imports path.
-func imports(f *ast.File, path string) bool {
-	return importSpec(f, path) != nil
-}
-
-// importSpec returns the import spec if f imports path,
-// or nil otherwise.
-func importSpec(f *ast.File, path string) *ast.ImportSpec {
-	for _, s := range f.Imports {
-		if importPath(s) == path {
-			return s
-		}
-	}
-	return nil
-}
-
-// importPath returns the unquoted import path of s,
-// or "" if the path is not properly quoted.
-func importPath(s *ast.ImportSpec) string {
-	t, err := strconv.Unquote(s.Path.Value)
-	if err == nil {
-		return t
-	}
-	return ""
-}
-
-// declImports reports whether gen contains an import of path.
-func declImports(gen *ast.GenDecl, path string) bool {
-	if gen.Tok != token.IMPORT {
-		return false
-	}
-	for _, spec := range gen.Specs {
-		impspec := spec.(*ast.ImportSpec)
-		if importPath(impspec) == path {
-			return true
-		}
-	}
-	return false
-}
-
-// isTopName reports whether n is a top-level unresolved identifier with the given name.
-func isTopName(n ast.Expr, name string) bool {
-	id, ok := n.(*ast.Ident)
-	return ok && id.Name == name && id.Obj == nil
-}
-
-// renameTop renames all references to the top-level name old.
-// It reports whether it makes any changes.
-func renameTop(f *ast.File, old, new string) bool {
-	var fixed bool
-
-	// Rename any conflicting imports
-	// (assuming package name is last element of path).
-	for _, s := range f.Imports {
-		if s.Name != nil {
-			if s.Name.Name == old {
-				s.Name.Name = new
-				fixed = true
-			}
-		} else {
-			_, thisName := path.Split(importPath(s))
-			if thisName == old {
-				s.Name = ast.NewIdent(new)
-				fixed = true
-			}
-		}
-	}
-
-	// Rename any top-level declarations.
-	for _, d := range f.Decls {
-		switch d := d.(type) {
-		case *ast.FuncDecl:
-			if d.Recv == nil && d.Name.Name == old {
-				d.Name.Name = new
-				d.Name.Obj.Name = new
-				fixed = true
-			}
-		case *ast.GenDecl:
-			for _, s := range d.Specs {
-				switch s := s.(type) {
-				case *ast.TypeSpec:
-					if s.Name.Name == old {
-						s.Name.Name = new
-						s.Name.Obj.Name = new
-						fixed = true
-					}
-				case *ast.ValueSpec:
-					for _, n := range s.Names {
-						if n.Name == old {
-							n.Name = new
-							n.Obj.Name = new
-							fixed = true
-						}
-					}
-				}
-			}
-		}
-	}
-
-	// Rename top-level old to new, both unresolved names
-	// (probably defined in another file) and names that resolve
-	// to a declaration we renamed.
-	walk(f, func(n any) {
-		id, ok := n.(*ast.Ident)
-		if ok && isTopName(id, old) {
-			id.Name = new
-			fixed = true
-		}
-		if ok && id.Obj != nil && id.Name == old && id.Obj.Name == new {
-			id.Name = id.Obj.Name
-			fixed = true
-		}
-	})
-
-	return fixed
-}
-
-// matchLen returns the length of the longest prefix shared by x and y.
-func matchLen(x, y string) int {
-	i := 0
-	for i < len(x) && i < len(y) && x[i] == y[i] {
-		i++
-	}
-	return i
-}
-
-// addImport adds the import path to the file f, if absent.
-func addImport(f *ast.File, ipath string) (added bool) {
-	if imports(f, ipath) {
-		return false
-	}
-
-	// Determine name of import.
-	// Assume added imports follow convention of using last element.
-	_, name := path.Split(ipath)
-
-	// Rename any conflicting top-level references from name to name_.
-	renameTop(f, name, name+"_")
-
-	newImport := &ast.ImportSpec{
-		Path: &ast.BasicLit{
-			Kind:  token.STRING,
-			Value: strconv.Quote(ipath),
-		},
-	}
-
-	// Find an import decl to add to.
-	var (
-		bestMatch  = -1
-		lastImport = -1
-		impDecl    *ast.GenDecl
-		impIndex   = -1
-	)
-	for i, decl := range f.Decls {
-		gen, ok := decl.(*ast.GenDecl)
-		if ok && gen.Tok == token.IMPORT {
-			lastImport = i
-			// Do not add to import "C", to avoid disrupting the
-			// association with its doc comment, breaking cgo.
-			if declImports(gen, "C") {
-				continue
-			}
-
-			// Compute longest shared prefix with imports in this block.
-			for j, spec := range gen.Specs {
-				impspec := spec.(*ast.ImportSpec)
-				n := matchLen(importPath(impspec), ipath)
-				if n > bestMatch {
-					bestMatch = n
-					impDecl = gen
-					impIndex = j
-				}
-			}
-		}
-	}
-
-	// If no import decl found, add one after the last import.
-	if impDecl == nil {
-		impDecl = &ast.GenDecl{
-			Tok: token.IMPORT,
-		}
-		f.Decls = append(f.Decls, nil)
-		copy(f.Decls[lastImport+2:], f.Decls[lastImport+1:])
-		f.Decls[lastImport+1] = impDecl
-	}
-
-	// Ensure the import decl has parentheses, if needed.
-	if len(impDecl.Specs) > 0 && !impDecl.Lparen.IsValid() {
-		impDecl.Lparen = impDecl.Pos()
-	}
-
-	insertAt := impIndex + 1
-	if insertAt == 0 {
-		insertAt = len(impDecl.Specs)
-	}
-	impDecl.Specs = append(impDecl.Specs, nil)
-	copy(impDecl.Specs[insertAt+1:], impDecl.Specs[insertAt:])
-	impDecl.Specs[insertAt] = newImport
-	if insertAt > 0 {
-		// Assign same position as the previous import,
-		// so that the sorter sees it as being in the same block.
-		prev := impDecl.Specs[insertAt-1]
-		newImport.Path.ValuePos = prev.Pos()
-		newImport.EndPos = prev.Pos()
-	}
-
-	f.Imports = append(f.Imports, newImport)
-	return true
-}
-
-// deleteImport deletes the import path from the file f, if present.
-func deleteImport(f *ast.File, path string) (deleted bool) {
-	oldImport := importSpec(f, path)
-
-	// Find the import node that imports path, if any.
-	for i, decl := range f.Decls {
-		gen, ok := decl.(*ast.GenDecl)
-		if !ok || gen.Tok != token.IMPORT {
-			continue
-		}
-		for j, spec := range gen.Specs {
-			impspec := spec.(*ast.ImportSpec)
-			if oldImport != impspec {
-				continue
-			}
-
-			// We found an import spec that imports path.
-			// Delete it.
-			deleted = true
-			copy(gen.Specs[j:], gen.Specs[j+1:])
-			gen.Specs = gen.Specs[:len(gen.Specs)-1]
-
-			// If this was the last import spec in this decl,
-			// delete the decl, too.
-			if len(gen.Specs) == 0 {
-				copy(f.Decls[i:], f.Decls[i+1:])
-				f.Decls = f.Decls[:len(f.Decls)-1]
-			} else if len(gen.Specs) == 1 {
-				gen.Lparen = token.NoPos // drop parens
-			}
-			if j > 0 {
-				// We deleted an entry but now there will be
-				// a blank line-sized hole where the import was.
-				// Close the hole by making the previous
-				// import appear to "end" where this one did.
-				gen.Specs[j-1].(*ast.ImportSpec).EndPos = impspec.End()
-			}
-			break
-		}
-	}
-
-	// Delete it from f.Imports.
-	for i, imp := range f.Imports {
-		if imp == oldImport {
-			copy(f.Imports[i:], f.Imports[i+1:])
-			f.Imports = f.Imports[:len(f.Imports)-1]
-			break
-		}
-	}
-
-	return
-}
-
-// rewriteImport rewrites any import of path oldPath to path newPath.
-func rewriteImport(f *ast.File, oldPath, newPath string) (rewrote bool) {
-	for _, imp := range f.Imports {
-		if importPath(imp) == oldPath {
-			rewrote = true
-			// record old End, because the default is to compute
-			// it using the length of imp.Path.Value.
-			imp.EndPos = imp.End()
-			imp.Path.Value = strconv.Quote(newPath)
-		}
-	}
-	return
-}
--- a/src/cmd/fix/gotypes.go
+++ b/src/cmd/fix/gotypes.go
@ -1,16 +0,0 @@
-// Copyright 2015 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(gotypesFix)
-}
-
-var gotypesFix = fix{
-	name: "gotypes",
-	date: "2015-07-16",
-	f:    noop,
-	desc: `Change imports of golang.org/x/tools/go/{exact,types} to go/{constant,types} (removed)`,
-}
--- a/src/cmd/fix/import_test.go
+++ b/src/cmd/fix/import_test.go
@ -1,458 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import "go/ast"
-
-func init() {
-	addTestCases(importTests, nil)
-}
-
-var importTests = []testCase{
-	{
-		Name: "import.0",
-		Fn:   addImportFn("os"),
-		In: `package main
-
-import (
-	"os"
-)
-`,
-		Out: `package main
-
-import (
-	"os"
-)
-`,
-	},
-	{
-		Name: "import.1",
-		Fn:   addImportFn("os"),
-		In: `package main
-`,
-		Out: `package main
-
-import "os"
-`,
-	},
-	{
-		Name: "import.2",
-		Fn:   addImportFn("os"),
-		In: `package main
-
-// Comment
-import "C"
-`,
-		Out: `package main
-
-// Comment
-import "C"
-import "os"
-`,
-	},
-	{
-		Name: "import.3",
-		Fn:   addImportFn("os"),
-		In: `package main
-
-// Comment
-import "C"
-
-import (
-	"io"
-	"utf8"
-)
-`,
-		Out: `package main
-
-// Comment
-import "C"
-
-import (
-	"io"
-	"os"
-	"utf8"
-)
-`,
-	},
-	{
-		Name: "import.4",
-		Fn:   deleteImportFn("os"),
-		In: `package main
-
-import (
-	"os"
-)
-`,
-		Out: `package main
-`,
-	},
-	{
-		Name: "import.5",
-		Fn:   deleteImportFn("os"),
-		In: `package main
-
-// Comment
-import "C"
-import "os"
-`,
-		Out: `package main
-
-// Comment
-import "C"
-`,
-	},
-	{
-		Name: "import.6",
-		Fn:   deleteImportFn("os"),
-		In: `package main
-
-// Comment
-import "C"
-
-import (
-	"io"
-	"os"
-	"utf8"
-)
-`,
-		Out: `package main
-
-// Comment
-import "C"
-
-import (
-	"io"
-	"utf8"
-)
-`,
-	},
-	{
-		Name: "import.7",
-		Fn:   deleteImportFn("io"),
-		In: `package main
-
-import (
-	"io"   // a
-	"os"   // b
-	"utf8" // c
-)
-`,
-		Out: `package main
-
-import (
-	// a
-	"os"   // b
-	"utf8" // c
-)
-`,
-	},
-	{
-		Name: "import.8",
-		Fn:   deleteImportFn("os"),
-		In: `package main
-
-import (
-	"io"   // a
-	"os"   // b
-	"utf8" // c
-)
-`,
-		Out: `package main
-
-import (
-	"io" // a
-	// b
-	"utf8" // c
-)
-`,
-	},
-	{
-		Name: "import.9",
-		Fn:   deleteImportFn("utf8"),
-		In: `package main
-
-import (
-	"io"   // a
-	"os"   // b
-	"utf8" // c
-)
-`,
-		Out: `package main
-
-import (
-	"io" // a
-	"os" // b
-	// c
-)
-`,
-	},
-	{
-		Name: "import.10",
-		Fn:   deleteImportFn("io"),
-		In: `package main
-
-import (
-	"io"
-	"os"
-	"utf8"
-)
-`,
-		Out: `package main
-
-import (
-	"os"
-	"utf8"
-)
-`,
-	},
-	{
-		Name: "import.11",
-		Fn:   deleteImportFn("os"),
-		In: `package main
-
-import (
-	"io"
-	"os"
-	"utf8"
-)
-`,
-		Out: `package main
-
-import (
-	"io"
-	"utf8"
-)
-`,
-	},
-	{
-		Name: "import.12",
-		Fn:   deleteImportFn("utf8"),
-		In: `package main
-
-import (
-	"io"
-	"os"
-	"utf8"
-)
-`,
-		Out: `package main
-
-import (
-	"io"
-	"os"
-)
-`,
-	},
-	{
-		Name: "import.13",
-		Fn:   rewriteImportFn("utf8", "encoding/utf8"),
-		In: `package main
-
-import (
-	"io"
-	"os"
-	"utf8" // thanks ken
-)
-`,
-		Out: `package main
-
-import (
-	"encoding/utf8" // thanks ken
-	"io"
-	"os"
-)
-`,
-	},
-	{
-		Name: "import.14",
-		Fn:   rewriteImportFn("asn1", "encoding/asn1"),
-		In: `package main
-
-import (
-	"asn1"
-	"crypto"
-	"crypto/rsa"
-	_ "crypto/sha1"
-	"crypto/x509"
-	"crypto/x509/pkix"
-	"time"
-)
-
-var x = 1
-`,
-		Out: `package main
-
-import (
-	"crypto"
-	"crypto/rsa"
-	_ "crypto/sha1"
-	"crypto/x509"
-	"crypto/x509/pkix"
-	"encoding/asn1"
-	"time"
-)
-
-var x = 1
-`,
-	},
-	{
-		Name: "import.15",
-		Fn:   rewriteImportFn("url", "net/url"),
-		In: `package main
-
-import (
-	"bufio"
-	"net"
-	"path"
-	"url"
-)
-
-var x = 1 // comment on x, not on url
-`,
-		Out: `package main
-
-import (
-	"bufio"
-	"net"
-	"net/url"
-	"path"
-)
-
-var x = 1 // comment on x, not on url
-`,
-	},
-	{
-		Name: "import.16",
-		Fn:   rewriteImportFn("http", "net/http", "template", "text/template"),
-		In: `package main
-
-import (
-	"flag"
-	"http"
-	"log"
-	"template"
-)
-
-var addr = flag.String("addr", ":1718", "http service address") // Q=17, R=18
-`,
-		Out: `package main
-
-import (
-	"flag"
-	"log"
-	"net/http"
-	"text/template"
-)
-
-var addr = flag.String("addr", ":1718", "http service address") // Q=17, R=18
-`,
-	},
-	{
-		Name: "import.17",
-		Fn:   addImportFn("x/y/z", "x/a/c"),
-		In: `package main
-
-// Comment
-import "C"
-
-import (
-	"a"
-	"b"
-
-	"x/w"
-
-	"d/f"
-)
-`,
-		Out: `package main
-
-// Comment
-import "C"
-
-import (
-	"a"
-	"b"
-
-	"x/a/c"
-	"x/w"
-	"x/y/z"
-
-	"d/f"
-)
-`,
-	},
-	{
-		Name: "import.18",
-		Fn:   addDelImportFn("e", "o"),
-		In: `package main
-
-import (
-	"f"
-	"o"
-	"z"
-)
-`,
-		Out: `package main
-
-import (
-	"e"
-	"f"
-	"z"
-)
-`,
-	},
-}
-
-func addImportFn(path ...string) func(*ast.File) bool {
-	return func(f *ast.File) bool {
-		fixed := false
-		for _, p := range path {
-			if !imports(f, p) {
-				addImport(f, p)
-				fixed = true
-			}
-		}
-		return fixed
-	}
-}
-
-func deleteImportFn(path string) func(*ast.File) bool {
-	return func(f *ast.File) bool {
-		if imports(f, path) {
-			deleteImport(f, path)
-			return true
-		}
-		return false
-	}
-}
-
-func addDelImportFn(p1 string, p2 string) func(*ast.File) bool {
-	return func(f *ast.File) bool {
-		fixed := false
-		if !imports(f, p1) {
-			addImport(f, p1)
-			fixed = true
-		}
-		if imports(f, p2) {
-			deleteImport(f, p2)
-			fixed = true
-		}
-		return fixed
-	}
-}
-
-func rewriteImportFn(oldnew ...string) func(*ast.File) bool {
-	return func(f *ast.File) bool {
-		fixed := false
-		for i := 0; i < len(oldnew); i += 2 {
-			if imports(f, oldnew[i]) {
-				rewriteImport(f, oldnew[i], oldnew[i+1])
-				fixed = true
-			}
-		}
-		return fixed
-	}
-}
--- a/src/cmd/fix/jnitype.go
+++ b/src/cmd/fix/jnitype.go
@ -1,17 +0,0 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(jniFix)
-}
-
-var jniFix = fix{
-	name:     "jni",
-	date:     "2017-12-04",
-	f:        noop,
-	desc:     `Fixes initializers of JNI's jobject and subtypes (removed)`,
-	disabled: false,
-}
--- a/src/cmd/fix/main.go
+++ b/src/cmd/fix/main.go
@ -5,261 +5,27 @@
 package main

 import (
-	"bytes"
 	"flag"
 	"fmt"
-	"go/ast"
-	"go/format"
-	"go/parser"
-	"go/scanner"
-	"go/token"
-	"go/version"
-	"internal/diff"
-	"io"
-	"io/fs"
 	"os"
-	"path/filepath"
-	"slices"
-	"strings"
-
-	"cmd/internal/telemetry/counter"
 )

 var (
-	fset     = token.NewFileSet()
-	exitCode = 0
+	_ = flag.Bool("diff", false, "obsolete, no effect")
+	_ = flag.String("go", "", "obsolete, no effect")
+	_ = flag.String("r", "", "obsolete, no effect")
+	_ = flag.String("force", "", "obsolete, no effect")
 )

-var allowedRewrites = flag.String("r", "",
-	"restrict the rewrites to this comma-separated list")
-
-var forceRewrites = flag.String("force", "",
-	"force these fixes to run even if the code looks updated")
-
-var allowed, force map[string]bool
-
-var (
-	doDiff    = flag.Bool("diff", false, "display diffs instead of rewriting files")
-	goVersion = flag.String("go", "", "go language version for files")
-)
-
-// enable for debugging fix failures
-const debug = false // display incorrectly reformatted source and exit
-
 func usage() {
-	fmt.Fprintf(os.Stderr, "usage: go tool fix [-diff] [-r fixname,...] [-force fixname,...] [path ...]\n")
+	fmt.Fprintf(os.Stderr, "usage: go tool fix [-diff] [-r ignored] [-force ignored] ...\n")
 	flag.PrintDefaults()
-	fmt.Fprintf(os.Stderr, "\nAvailable rewrites are:\n")
-	slices.SortFunc(fixes, func(a, b fix) int {
-		return strings.Compare(a.name, b.name)
-	})
-	for _, f := range fixes {
-		if f.disabled {
-			fmt.Fprintf(os.Stderr, "\n%s (disabled)\n", f.name)
-		} else {
-			fmt.Fprintf(os.Stderr, "\n%s\n", f.name)
-		}
-		desc := strings.TrimSpace(f.desc)
-		desc = strings.ReplaceAll(desc, "\n", "\n\t")
-		fmt.Fprintf(os.Stderr, "\t%s\n", desc)
-	}
 	os.Exit(2)
 }

 func main() {
-	counter.Open()
 	flag.Usage = usage
 	flag.Parse()
-	counter.Inc("fix/invocations")
-	counter.CountFlags("fix/flag:", *flag.CommandLine)

-	if !version.IsValid(*goVersion) {
-		report(fmt.Errorf("invalid -go=%s", *goVersion))
-		os.Exit(exitCode)
-	}
-
-	slices.SortFunc(fixes, func(a, b fix) int {
-		return strings.Compare(a.date, b.date)
-	})
-
-	if *allowedRewrites != "" {
-		allowed = make(map[string]bool)
-		for f := range strings.SplitSeq(*allowedRewrites, ",") {
-			allowed[f] = true
-		}
-	}
-
-	if *forceRewrites != "" {
-		force = make(map[string]bool)
-		for f := range strings.SplitSeq(*forceRewrites, ",") {
-			force[f] = true
-		}
-	}
-
-	if flag.NArg() == 0 {
-		if err := processFile("standard input", true); err != nil {
-			report(err)
-		}
-		os.Exit(exitCode)
-	}
-
-	for i := 0; i < flag.NArg(); i++ {
-		path := flag.Arg(i)
-		switch dir, err := os.Stat(path); {
-		case err != nil:
-			report(err)
-		case dir.IsDir():
-			walkDir(path)
-		default:
-			if err := processFile(path, false); err != nil {
-				report(err)
-			}
-		}
-	}
-
-	os.Exit(exitCode)
-}
-
-const parserMode = parser.ParseComments
-
-func gofmtFile(f *ast.File) ([]byte, error) {
-	var buf bytes.Buffer
-	if err := format.Node(&buf, fset, f); err != nil {
-		return nil, err
-	}
-	return buf.Bytes(), nil
-}
-
-func processFile(filename string, useStdin bool) error {
-	var f *os.File
-	var err error
-	var fixlog strings.Builder
-
-	if useStdin {
-		f = os.Stdin
-	} else {
-		f, err = os.Open(filename)
-		if err != nil {
-			return err
-		}
-		defer f.Close()
-	}
-
-	src, err := io.ReadAll(f)
-	if err != nil {
-		return err
-	}
-
-	file, err := parser.ParseFile(fset, filename, src, parserMode)
-	if err != nil {
-		return err
-	}
-
-	// Make sure file is in canonical format.
-	// This "fmt" pseudo-fix cannot be disabled.
-	newSrc, err := gofmtFile(file)
-	if err != nil {
-		return err
-	}
-	if !bytes.Equal(newSrc, src) {
-		newFile, err := parser.ParseFile(fset, filename, newSrc, parserMode)
-		if err != nil {
-			return err
-		}
-		file = newFile
-		fmt.Fprintf(&fixlog, " fmt")
-	}
-
-	// Apply all fixes to file.
-	newFile := file
-	fixed := false
-	for _, fix := range fixes {
-		if allowed != nil && !allowed[fix.name] {
-			continue
-		}
-		if fix.disabled && !force[fix.name] {
-			continue
-		}
-		if fix.f(newFile) {
-			fixed = true
-			fmt.Fprintf(&fixlog, " %s", fix.name)
-
-			// AST changed.
-			// Print and parse, to update any missing scoping
-			// or position information for subsequent fixers.
-			newSrc, err := gofmtFile(newFile)
-			if err != nil {
-				return err
-			}
-			newFile, err = parser.ParseFile(fset, filename, newSrc, parserMode)
-			if err != nil {
-				if debug {
-					fmt.Printf("%s", newSrc)
-					report(err)
-					os.Exit(exitCode)
-				}
-				return err
-			}
-		}
-	}
-	if !fixed {
-		return nil
-	}
-	fmt.Fprintf(os.Stderr, "%s: fixed %s\n", filename, fixlog.String()[1:])
-
-	// Print AST.  We did that after each fix, so this appears
-	// redundant, but it is necessary to generate gofmt-compatible
-	// source code in a few cases. The official gofmt style is the
-	// output of the printer run on a standard AST generated by the parser,
-	// but the source we generated inside the loop above is the
-	// output of the printer run on a mangled AST generated by a fixer.
-	newSrc, err = gofmtFile(newFile)
-	if err != nil {
-		return err
-	}
-
-	if *doDiff {
-		os.Stdout.Write(diff.Diff(filename, src, "fixed/"+filename, newSrc))
-		return nil
-	}
-
-	if useStdin {
-		os.Stdout.Write(newSrc)
-		return nil
-	}
-
-	return os.WriteFile(f.Name(), newSrc, 0)
-}
-
-func gofmt(n any) string {
-	var gofmtBuf strings.Builder
-	if err := format.Node(&gofmtBuf, fset, n); err != nil {
-		return "<" + err.Error() + ">"
-	}
-	return gofmtBuf.String()
-}
-
-func report(err error) {
-	scanner.PrintError(os.Stderr, err)
-	exitCode = 2
-}
-
-func walkDir(path string) {
-	filepath.WalkDir(path, visitFile)
-}
-
-func visitFile(path string, f fs.DirEntry, err error) error {
-	if err == nil && isGoFile(f) {
-		err = processFile(path, false)
-	}
-	if err != nil {
-		report(err)
-	}
-	return nil
-}
-
-func isGoFile(f fs.DirEntry) bool {
-	// ignore non-Go files
-	name := f.Name()
-	return !f.IsDir() && !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".go")
+	os.Exit(0)
 }
--- a/src/cmd/fix/main_test.go
+++ b/src/cmd/fix/main_test.go
@ -1,166 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"fmt"
-	"go/ast"
-	"go/parser"
-	"internal/diff"
-	"internal/testenv"
-	"strings"
-	"testing"
-)
-
-type testCase struct {
-	Name    string
-	Fn      func(*ast.File) bool
-	Version string
-	In      string
-	Out     string
-}
-
-var testCases []testCase
-
-func addTestCases(t []testCase, fn func(*ast.File) bool) {
-	// Fill in fn to avoid repetition in definitions.
-	if fn != nil {
-		for i := range t {
-			if t[i].Fn == nil {
-				t[i].Fn = fn
-			}
-		}
-	}
-	testCases = append(testCases, t...)
-}
-
-func fnop(*ast.File) bool { return false }
-
-func parseFixPrint(t *testing.T, fn func(*ast.File) bool, desc, in string, mustBeGofmt bool) (out string, fixed, ok bool) {
-	file, err := parser.ParseFile(fset, desc, in, parserMode)
-	if err != nil {
-		t.Errorf("parsing: %v", err)
-		return
-	}
-
-	outb, err := gofmtFile(file)
-	if err != nil {
-		t.Errorf("printing: %v", err)
-		return
-	}
-	if s := string(outb); in != s && mustBeGofmt {
-		t.Errorf("not gofmt-formatted.\n--- %s\n%s\n--- %s | gofmt\n%s",
-			desc, in, desc, s)
-		tdiff(t, "want", in, "have", s)
-		return
-	}
-
-	if fn == nil {
-		for _, fix := range fixes {
-			if fix.f(file) {
-				fixed = true
-			}
-		}
-	} else {
-		fixed = fn(file)
-	}
-
-	outb, err = gofmtFile(file)
-	if err != nil {
-		t.Errorf("printing: %v", err)
-		return
-	}
-
-	return string(outb), fixed, true
-}
-
-func TestRewrite(t *testing.T) {
-	// If cgo is enabled, enforce that cgo commands invoked by cmd/fix
-	// do not fail during testing.
-	if testenv.HasCGO() {
-		testenv.MustHaveGoBuild(t) // Really just 'go tool cgo', but close enough.
-
-		// The reportCgoError hook is global, so we can't set it per-test
-		// if we want to be able to run those tests in parallel.
-		// Instead, simply set it to panic on error: the goroutine dump
-		// from the panic should help us determine which test failed.
-		prevReportCgoError := reportCgoError
-		reportCgoError = func(err error) {
-			panic(fmt.Sprintf("unexpected cgo error: %v", err))
-		}
-		t.Cleanup(func() { reportCgoError = prevReportCgoError })
-	}
-
-	for _, tt := range testCases {
-		tt := tt
-		t.Run(tt.Name, func(t *testing.T) {
-			if tt.Version == "" {
-				if testing.Verbose() {
-					// Don't run in parallel: cmd/fix sometimes writes directly to stderr,
-					// and since -v prints which test is currently running we want that
-					// information to accurately correlate with the stderr output.
-				} else {
-					t.Parallel()
-				}
-			} else {
-				old := *goVersion
-				*goVersion = tt.Version
-				defer func() {
-					*goVersion = old
-				}()
-			}
-
-			// Apply fix: should get tt.Out.
-			out, fixed, ok := parseFixPrint(t, tt.Fn, tt.Name, tt.In, true)
-			if !ok {
-				return
-			}
-
-			// reformat to get printing right
-			out, _, ok = parseFixPrint(t, fnop, tt.Name, out, false)
-			if !ok {
-				return
-			}
-
-			if tt.Out == "" {
-				tt.Out = tt.In
-			}
-			if out != tt.Out {
-				t.Errorf("incorrect output.\n")
-				if !strings.HasPrefix(tt.Name, "testdata/") {
-					t.Errorf("--- have\n%s\n--- want\n%s", out, tt.Out)
-				}
-				tdiff(t, "have", out, "want", tt.Out)
-				return
-			}
-
-			if changed := out != tt.In; changed != fixed {
-				t.Errorf("changed=%v != fixed=%v", changed, fixed)
-				return
-			}
-
-			// Should not change if run again.
-			out2, fixed2, ok := parseFixPrint(t, tt.Fn, tt.Name+" output", out, true)
-			if !ok {
-				return
-			}
-
-			if fixed2 {
-				t.Errorf("applied fixes during second round")
-				return
-			}
-
-			if out2 != out {
-				t.Errorf("changed output after second round of fixes.\n--- output after first round\n%s\n--- output after second round\n%s",
-					out, out2)
-				tdiff(t, "first", out, "second", out2)
-			}
-		})
-	}
-}
-
-func tdiff(t *testing.T, aname, a, bname, b string) {
-	t.Errorf("%s", diff.Diff(aname, []byte(a), bname, []byte(b)))
-}
--- a/src/cmd/fix/netipv6zone.go
+++ b/src/cmd/fix/netipv6zone.go
@ -1,19 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(netipv6zoneFix)
-}
-
-var netipv6zoneFix = fix{
-	name: "netipv6zone",
-	date: "2012-11-26",
-	f:    noop,
-	desc: `Adapt element key to IPAddr, UDPAddr or TCPAddr composite literals (removed).
-
-https://codereview.appspot.com/6849045/
-`,
-}
--- a/src/cmd/fix/printerconfig.go
+++ b/src/cmd/fix/printerconfig.go
@ -1,16 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-func init() {
-	register(printerconfigFix)
-}
-
-var printerconfigFix = fix{
-	name: "printerconfig",
-	date: "2012-12-11",
-	f:    noop,
-	desc: `Add element keys to Config composite literals (removed).`,
-}
--- a/src/cmd/fix/typecheck.go
+++ b/src/cmd/fix/typecheck.go
@ -1,814 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package main
-
-import (
-	"fmt"
-	"go/ast"
-	"go/parser"
-	"go/token"
-	"maps"
-	"os"
-	"os/exec"
-	"path/filepath"
-	"reflect"
-	"runtime"
-	"strings"
-)
-
-// Partial type checker.
-//
-// The fact that it is partial is very important: the input is
-// an AST and a description of some type information to
-// assume about one or more packages, but not all the
-// packages that the program imports. The checker is
-// expected to do as much as it can with what it has been
-// given. There is not enough information supplied to do
-// a full type check, but the type checker is expected to
-// apply information that can be derived from variable
-// declarations, function and method returns, and type switches
-// as far as it can, so that the caller can still tell the types
-// of expression relevant to a particular fix.
-//
-// TODO(rsc,gri): Replace with go/typechecker.
-// Doing that could be an interesting test case for go/typechecker:
-// the constraints about working with partial information will
-// likely exercise it in interesting ways. The ideal interface would
-// be to pass typecheck a map from importpath to package API text
-// (Go source code), but for now we use data structures (TypeConfig, Type).
-//
-// The strings mostly use gofmt form.
-//
-// A Field or FieldList has as its type a comma-separated list
-// of the types of the fields. For example, the field list
-//	x, y, z int
-// has type "int, int, int".
-
-// The prefix "type " is the type of a type.
-// For example, given
-//	var x int
-//	type T int
-// x's type is "int" but T's type is "type int".
-// mkType inserts the "type " prefix.
-// getType removes it.
-// isType tests for it.
-
-func mkType(t string) string {
-	return "type " + t
-}
-
-func getType(t string) string {
-	if !isType(t) {
-		return ""
-	}
-	return t[len("type "):]
-}
-
-func isType(t string) bool {
-	return strings.HasPrefix(t, "type ")
-}
-
-// TypeConfig describes the universe of relevant types.
-// For ease of creation, the types are all referred to by string
-// name (e.g., "reflect.Value").  TypeByName is the only place
-// where the strings are resolved.
-
-type TypeConfig struct {
-	Type map[string]*Type
-	Var  map[string]string
-	Func map[string]string
-
-	// External maps from a name to its type.
-	// It provides additional typings not present in the Go source itself.
-	// For now, the only additional typings are those generated by cgo.
-	External map[string]string
-}
-
-// typeof returns the type of the given name, which may be of
-// the form "x" or "p.X".
-func (cfg *TypeConfig) typeof(name string) string {
-	if cfg.Var != nil {
-		if t := cfg.Var[name]; t != "" {
-			return t
-		}
-	}
-	if cfg.Func != nil {
-		if t := cfg.Func[name]; t != "" {
-			return "func()" + t
-		}
-	}
-	return ""
-}
-
-// Type describes the Fields and Methods of a type.
-// If the field or method cannot be found there, it is next
-// looked for in the Embed list.
-type Type struct {
-	Field  map[string]string // map field name to type
-	Method map[string]string // map method name to comma-separated return types (should start with "func ")
-	Embed  []string          // list of types this type embeds (for extra methods)
-	Def    string            // definition of named type
-}
-
-// dot returns the type of "typ.name", making its decision
-// using the type information in cfg.
-func (typ *Type) dot(cfg *TypeConfig, name string) string {
-	if typ.Field != nil {
-		if t := typ.Field[name]; t != "" {
-			return t
-		}
-	}
-	if typ.Method != nil {
-		if t := typ.Method[name]; t != "" {
-			return t
-		}
-	}
-
-	for _, e := range typ.Embed {
-		etyp := cfg.Type[e]
-		if etyp != nil {
-			if t := etyp.dot(cfg, name); t != "" {
-				return t
-			}
-		}
-	}
-
-	return ""
-}
-
-// typecheck type checks the AST f assuming the information in cfg.
-// It returns two maps with type information:
-// typeof maps AST nodes to type information in gofmt string form.
-// assign maps type strings to lists of expressions that were assigned
-// to values of another type that were assigned to that type.
-func typecheck(cfg *TypeConfig, f *ast.File) (typeof map[any]string, assign map[string][]any) {
-	typeof = make(map[any]string)
-	assign = make(map[string][]any)
-	cfg1 := &TypeConfig{}
-	*cfg1 = *cfg // make copy so we can add locally
-	copied := false
-
-	// If we import "C", add types of cgo objects.
-	cfg.External = map[string]string{}
-	cfg1.External = cfg.External
-	if imports(f, "C") {
-		// Run cgo on gofmtFile(f)
-		// Parse, extract decls from _cgo_gotypes.go
-		// Map _Ctype_* types to C.* types.
-		err := func() error {
-			txt, err := gofmtFile(f)
-			if err != nil {
-				return err
-			}
-			dir, err := os.MkdirTemp(os.TempDir(), "fix_cgo_typecheck")
-			if err != nil {
-				return err
-			}
-			defer os.RemoveAll(dir)
-			err = os.WriteFile(filepath.Join(dir, "in.go"), txt, 0600)
-			if err != nil {
-				return err
-			}
-			goCmd := "go"
-			if goroot := runtime.GOROOT(); goroot != "" {
-				goCmd = filepath.Join(goroot, "bin", "go")
-			}
-			cmd := exec.Command(goCmd, "tool", "cgo", "-objdir", dir, "-srcdir", dir, "in.go")
-			if reportCgoError != nil {
-				// Since cgo command errors will be reported, also forward the error
-				// output from the command for debugging.
-				cmd.Stderr = os.Stderr
-			}
-			err = cmd.Run()
-			if err != nil {
-				return err
-			}
-			out, err := os.ReadFile(filepath.Join(dir, "_cgo_gotypes.go"))
-			if err != nil {
-				return err
-			}
-			cgo, err := parser.ParseFile(token.NewFileSet(), "cgo.go", out, 0)
-			if err != nil {
-				return err
-			}
-			for _, decl := range cgo.Decls {
-				fn, ok := decl.(*ast.FuncDecl)
-				if !ok {
-					continue
-				}
-				if strings.HasPrefix(fn.Name.Name, "_Cfunc_") {
-					var params, results []string
-					for _, p := range fn.Type.Params.List {
-						t := gofmt(p.Type)
-						t = strings.ReplaceAll(t, "_Ctype_", "C.")
-						params = append(params, t)
-					}
-					for _, r := range fn.Type.Results.List {
-						t := gofmt(r.Type)
-						t = strings.ReplaceAll(t, "_Ctype_", "C.")
-						results = append(results, t)
-					}
-					cfg.External["C."+fn.Name.Name[7:]] = joinFunc(params, results)
-				}
-			}
-			return nil
-		}()
-		if err != nil {
-			if reportCgoError == nil {
-				fmt.Fprintf(os.Stderr, "go fix: warning: no cgo types: %s\n", err)
-			} else {
-				reportCgoError(err)
-			}
-		}
-	}
-
-	// gather function declarations
-	for _, decl := range f.Decls {
-		fn, ok := decl.(*ast.FuncDecl)
-		if !ok {
-			continue
-		}
-		typecheck1(cfg, fn.Type, typeof, assign)
-		t := typeof[fn.Type]
-		if fn.Recv != nil {
-			// The receiver must be a type.
-			rcvr := typeof[fn.Recv]
-			if !isType(rcvr) {
-				if len(fn.Recv.List) != 1 {
-					continue
-				}
-				rcvr = mkType(gofmt(fn.Recv.List[0].Type))
-				typeof[fn.Recv.List[0].Type] = rcvr
-			}
-			rcvr = getType(rcvr)
-			if rcvr != "" && rcvr[0] == '*' {
-				rcvr = rcvr[1:]
-			}
-			typeof[rcvr+"."+fn.Name.Name] = t
-		} else {
-			if isType(t) {
-				t = getType(t)
-			} else {
-				t = gofmt(fn.Type)
-			}
-			typeof[fn.Name] = t
-
-			// Record typeof[fn.Name.Obj] for future references to fn.Name.
-			typeof[fn.Name.Obj] = t
-		}
-	}
-
-	// gather struct declarations
-	for _, decl := range f.Decls {
-		d, ok := decl.(*ast.GenDecl)
-		if ok {
-			for _, s := range d.Specs {
-				switch s := s.(type) {
-				case *ast.TypeSpec:
-					if cfg1.Type[s.Name.Name] != nil {
-						break
-					}
-					if !copied {
-						copied = true
-						// Copy map lazily: it's time.
-						cfg1.Type = maps.Clone(cfg.Type)
-						if cfg1.Type == nil {
-							cfg1.Type = make(map[string]*Type)
-						}
-					}
-					t := &Type{Field: map[string]string{}}
-					cfg1.Type[s.Name.Name] = t
-					switch st := s.Type.(type) {
-					case *ast.StructType:
-						for _, f := range st.Fields.List {
-							for _, n := range f.Names {
-								t.Field[n.Name] = gofmt(f.Type)
-							}
-						}
-					case *ast.ArrayType, *ast.StarExpr, *ast.MapType:
-						t.Def = gofmt(st)
-					}
-				}
-			}
-		}
-	}
-
-	typecheck1(cfg1, f, typeof, assign)
-	return typeof, assign
-}
-
-// reportCgoError, if non-nil, reports a non-nil error from running the "cgo"
-// tool. (Set to a non-nil hook during testing if cgo is expected to work.)
-var reportCgoError func(err error)
-
-func makeExprList(a []*ast.Ident) []ast.Expr {
-	var b []ast.Expr
-	for _, x := range a {
-		b = append(b, x)
-	}
-	return b
-}
-
-// typecheck1 is the recursive form of typecheck.
-// It is like typecheck but adds to the information in typeof
-// instead of allocating a new map.
-func typecheck1(cfg *TypeConfig, f any, typeof map[any]string, assign map[string][]any) {
-	// set sets the type of n to typ.
-	// If isDecl is true, n is being declared.
-	set := func(n ast.Expr, typ string, isDecl bool) {
-		if typeof[n] != "" || typ == "" {
-			if typeof[n] != typ {
-				assign[typ] = append(assign[typ], n)
-			}
-			return
-		}
-		typeof[n] = typ
-
-		// If we obtained typ from the declaration of x
-		// propagate the type to all the uses.
-		// The !isDecl case is a cheat here, but it makes
-		// up in some cases for not paying attention to
-		// struct fields. The real type checker will be
-		// more accurate so we won't need the cheat.
-		if id, ok := n.(*ast.Ident); ok && id.Obj != nil && (isDecl || typeof[id.Obj] == "") {
-			typeof[id.Obj] = typ
-		}
-	}
-
-	// Type-check an assignment lhs = rhs.
-	// If isDecl is true, this is := so we can update
-	// the types of the objects that lhs refers to.
-	typecheckAssign := func(lhs, rhs []ast.Expr, isDecl bool) {
-		if len(lhs) > 1 && len(rhs) == 1 {
-			if _, ok := rhs[0].(*ast.CallExpr); ok {
-				t := split(typeof[rhs[0]])
-				// Lists should have same length but may not; pair what can be paired.
-				for i := 0; i < len(lhs) && i < len(t); i++ {
-					set(lhs[i], t[i], isDecl)
-				}
-				return
-			}
-		}
-		if len(lhs) == 1 && len(rhs) == 2 {
-			// x = y, ok
-			rhs = rhs[:1]
-		} else if len(lhs) == 2 && len(rhs) == 1 {
-			// x, ok = y
-			lhs = lhs[:1]
-		}
-
-		// Match as much as we can.
-		for i := 0; i < len(lhs) && i < len(rhs); i++ {
-			x, y := lhs[i], rhs[i]
-			if typeof[y] != "" {
-				set(x, typeof[y], isDecl)
-			} else {
-				set(y, typeof[x], false)
-			}
-		}
-	}
-
-	expand := func(s string) string {
-		typ := cfg.Type[s]
-		if typ != nil && typ.Def != "" {
-			return typ.Def
-		}
-		return s
-	}
-
-	// The main type check is a recursive algorithm implemented
-	// by walkBeforeAfter(n, before, after).
-	// Most of it is bottom-up, but in a few places we need
-	// to know the type of the function we are checking.
-	// The before function records that information on
-	// the curfn stack.
-	var curfn []*ast.FuncType
-
-	before := func(n any) {
-		// push function type on stack
-		switch n := n.(type) {
-		case *ast.FuncDecl:
-			curfn = append(curfn, n.Type)
-		case *ast.FuncLit:
-			curfn = append(curfn, n.Type)
-		}
-	}
-
-	// After is the real type checker.
-	after := func(n any) {
-		if n == nil {
-			return
-		}
-		if false && reflect.TypeOf(n).Kind() == reflect.Pointer { // debugging trace
-			defer func() {
-				if t := typeof[n]; t != "" {
-					pos := fset.Position(n.(ast.Node).Pos())
-					fmt.Fprintf(os.Stderr, "%s: typeof[%s] = %s\n", pos, gofmt(n), t)
-				}
-			}()
-		}
-
-		switch n := n.(type) {
-		case *ast.FuncDecl, *ast.FuncLit:
-			// pop function type off stack
-			curfn = curfn[:len(curfn)-1]
-
-		case *ast.FuncType:
-			typeof[n] = mkType(joinFunc(split(typeof[n.Params]), split(typeof[n.Results])))
-
-		case *ast.FieldList:
-			// Field list is concatenation of sub-lists.
-			t := ""
-			for _, field := range n.List {
-				if t != "" {
-					t += ", "
-				}
-				t += typeof[field]
-			}
-			typeof[n] = t
-
-		case *ast.Field:
-			// Field is one instance of the type per name.
-			all := ""
-			t := typeof[n.Type]
-			if !isType(t) {
-				// Create a type, because it is typically *T or *p.T
-				// and we might care about that type.
-				t = mkType(gofmt(n.Type))
-				typeof[n.Type] = t
-			}
-			t = getType(t)
-			if len(n.Names) == 0 {
-				all = t
-			} else {
-				for _, id := range n.Names {
-					if all != "" {
-						all += ", "
-					}
-					all += t
-					typeof[id.Obj] = t
-					typeof[id] = t
-				}
-			}
-			typeof[n] = all
-
-		case *ast.ValueSpec:
-			// var declaration. Use type if present.
-			if n.Type != nil {
-				t := typeof[n.Type]
-				if !isType(t) {
-					t = mkType(gofmt(n.Type))
-					typeof[n.Type] = t
-				}
-				t = getType(t)
-				for _, id := range n.Names {
-					set(id, t, true)
-				}
-			}
-			// Now treat same as assignment.
-			typecheckAssign(makeExprList(n.Names), n.Values, true)
-
-		case *ast.AssignStmt:
-			typecheckAssign(n.Lhs, n.Rhs, n.Tok == token.DEFINE)
-
-		case *ast.Ident:
-			// Identifier can take its type from underlying object.
-			if t := typeof[n.Obj]; t != "" {
-				typeof[n] = t
-			}
-
-		case *ast.SelectorExpr:
-			// Field or method.
-			name := n.Sel.Name
-			if t := typeof[n.X]; t != "" {
-				t = strings.TrimPrefix(t, "*") // implicit *
-				if typ := cfg.Type[t]; typ != nil {
-					if t := typ.dot(cfg, name); t != "" {
-						typeof[n] = t
-						return
-					}
-				}
-				tt := typeof[t+"."+name]
-				if isType(tt) {
-					typeof[n] = getType(tt)
-					return
-				}
-			}
-			// Package selector.
-			if x, ok := n.X.(*ast.Ident); ok && x.Obj == nil {
-				str := x.Name + "." + name
-				if cfg.Type[str] != nil {
-					typeof[n] = mkType(str)
-					return
-				}
-				if t := cfg.typeof(x.Name + "." + name); t != "" {
-					typeof[n] = t
-					return
-				}
-			}
-
-		case *ast.CallExpr:
-			// make(T) has type T.
-			if isTopName(n.Fun, "make") && len(n.Args) >= 1 {
-				typeof[n] = gofmt(n.Args[0])
-				return
-			}
-			// new(T) has type *T
-			if isTopName(n.Fun, "new") && len(n.Args) == 1 {
-				typeof[n] = "*" + gofmt(n.Args[0])
-				return
-			}
-			// Otherwise, use type of function to determine arguments.
-			t := typeof[n.Fun]
-			if t == "" {
-				t = cfg.External[gofmt(n.Fun)]
-			}
-			in, out := splitFunc(t)
-			if in == nil && out == nil {
-				return
-			}
-			typeof[n] = join(out)
-			for i, arg := range n.Args {
-				if i >= len(in) {
-					break
-				}
-				if typeof[arg] == "" {
-					typeof[arg] = in[i]
-				}
-			}
-
-		case *ast.TypeAssertExpr:
-			// x.(type) has type of x.
-			if n.Type == nil {
-				typeof[n] = typeof[n.X]
-				return
-			}
-			// x.(T) has type T.
-			if t := typeof[n.Type]; isType(t) {
-				typeof[n] = getType(t)
-			} else {
-				typeof[n] = gofmt(n.Type)
-			}
-
-		case *ast.SliceExpr:
-			// x[i:j] has type of x.
-			typeof[n] = typeof[n.X]
-
-		case *ast.IndexExpr:
-			// x[i] has key type of x's type.
-			t := expand(typeof[n.X])
-			if strings.HasPrefix(t, "[") || strings.HasPrefix(t, "map[") {
-				// Lazy: assume there are no nested [] in the array
-				// length or map key type.
-				if _, elem, ok := strings.Cut(t, "]"); ok {
-					typeof[n] = elem
-				}
-			}
-
-		case *ast.StarExpr:
-			// *x for x of type *T has type T when x is an expr.
-			// We don't use the result when *x is a type, but
-			// compute it anyway.
-			t := expand(typeof[n.X])
-			if isType(t) {
-				typeof[n] = "type *" + getType(t)
-			} else if strings.HasPrefix(t, "*") {
-				typeof[n] = t[len("*"):]
-			}
-
-		case *ast.UnaryExpr:
-			// &x for x of type T has type *T.
-			t := typeof[n.X]
-			if t != "" && n.Op == token.AND {
-				typeof[n] = "*" + t
-			}
-
-		case *ast.CompositeLit:
-			// T{...} has type T.
-			typeof[n] = gofmt(n.Type)
-
-			// Propagate types down to values used in the composite literal.
-			t := expand(typeof[n])
-			if strings.HasPrefix(t, "[") { // array or slice
-				// Lazy: assume there are no nested [] in the array length.
-				if _, et, ok := strings.Cut(t, "]"); ok {
-					for _, e := range n.Elts {
-						if kv, ok := e.(*ast.KeyValueExpr); ok {
-							e = kv.Value
-						}
-						if typeof[e] == "" {
-							typeof[e] = et
-						}
-					}
-				}
-			}
-			if strings.HasPrefix(t, "map[") { // map
-				// Lazy: assume there are no nested [] in the map key type.
-				if kt, vt, ok := strings.Cut(t[len("map["):], "]"); ok {
-					for _, e := range n.Elts {
-						if kv, ok := e.(*ast.KeyValueExpr); ok {
-							if typeof[kv.Key] == "" {
-								typeof[kv.Key] = kt
-							}
-							if typeof[kv.Value] == "" {
-								typeof[kv.Value] = vt
-							}
-						}
-					}
-				}
-			}
-			if typ := cfg.Type[t]; typ != nil && len(typ.Field) > 0 { // struct
-				for _, e := range n.Elts {
-					if kv, ok := e.(*ast.KeyValueExpr); ok {
-						if ft := typ.Field[fmt.Sprintf("%s", kv.Key)]; ft != "" {
-							if typeof[kv.Value] == "" {
-								typeof[kv.Value] = ft
-							}
-						}
-					}
-				}
-			}
-
-		case *ast.ParenExpr:
-			// (x) has type of x.
-			typeof[n] = typeof[n.X]
-
-		case *ast.RangeStmt:
-			t := expand(typeof[n.X])
-			if t == "" {
-				return
-			}
-			var key, value string
-			if t == "string" {
-				key, value = "int", "rune"
-			} else if strings.HasPrefix(t, "[") {
-				key = "int"
-				_, value, _ = strings.Cut(t, "]")
-			} else if strings.HasPrefix(t, "map[") {
-				if k, v, ok := strings.Cut(t[len("map["):], "]"); ok {
-					key, value = k, v
-				}
-			}
-			changed := false
-			if n.Key != nil && key != "" {
-				changed = true
-				set(n.Key, key, n.Tok == token.DEFINE)
-			}
-			if n.Value != nil && value != "" {
-				changed = true
-				set(n.Value, value, n.Tok == token.DEFINE)
-			}
-			// Ugly failure of vision: already type-checked body.
-			// Do it again now that we have that type info.
-			if changed {
-				typecheck1(cfg, n.Body, typeof, assign)
-			}
-
-		case *ast.TypeSwitchStmt:
-			// Type of variable changes for each case in type switch,
-			// but go/parser generates just one variable.
-			// Repeat type check for each case with more precise
-			// type information.
-			as, ok := n.Assign.(*ast.AssignStmt)
-			if !ok {
-				return
-			}
-			varx, ok := as.Lhs[0].(*ast.Ident)
-			if !ok {
-				return
-			}
-			t := typeof[varx]
-			for _, cas := range n.Body.List {
-				cas := cas.(*ast.CaseClause)
-				if len(cas.List) == 1 {
-					// Variable has specific type only when there is
-					// exactly one type in the case list.
-					if tt := typeof[cas.List[0]]; isType(tt) {
-						tt = getType(tt)
-						typeof[varx] = tt
-						typeof[varx.Obj] = tt
-						typecheck1(cfg, cas.Body, typeof, assign)
-					}
-				}
-			}
-			// Restore t.
-			typeof[varx] = t
-			typeof[varx.Obj] = t
-
-		case *ast.ReturnStmt:
-			if len(curfn) == 0 {
-				// Probably can't happen.
-				return
-			}
-			f := curfn[len(curfn)-1]
-			res := n.Results
-			if f.Results != nil {
-				t := split(typeof[f.Results])
-				for i := 0; i < len(res) && i < len(t); i++ {
-					set(res[i], t[i], false)
-				}
-			}
-
-		case *ast.BinaryExpr:
-			// Propagate types across binary ops that require two args of the same type.
-			switch n.Op {
-			case token.EQL, token.NEQ: // TODO: more cases. This is enough for the cftype fix.
-				if typeof[n.X] != "" && typeof[n.Y] == "" {
-					typeof[n.Y] = typeof[n.X]
-				}
-				if typeof[n.X] == "" && typeof[n.Y] != "" {
-					typeof[n.X] = typeof[n.Y]
-				}
-			}
-		}
-	}
-	walkBeforeAfter(f, before, after)
-}
-
-// Convert between function type strings and lists of types.
-// Using strings makes this a little harder, but it makes
-// a lot of the rest of the code easier. This will all go away
-// when we can use go/typechecker directly.
-
-// splitFunc splits "func(x,y,z) (a,b,c)" into ["x", "y", "z"] and ["a", "b", "c"].
-func splitFunc(s string) (in, out []string) {
-	if !strings.HasPrefix(s, "func(") {
-		return nil, nil
-	}
-
-	i := len("func(") // index of beginning of 'in' arguments
-	nparen := 0
-	for j := i; j < len(s); j++ {
-		switch s[j] {
-		case '(':
-			nparen++
-		case ')':
-			nparen--
-			if nparen < 0 {
-				// found end of parameter list
-				out := strings.TrimSpace(s[j+1:])
-				if len(out) >= 2 && out[0] == '(' && out[len(out)-1] == ')' {
-					out = out[1 : len(out)-1]
-				}
-				return split(s[i:j]), split(out)
-			}
-		}
-	}
-	return nil, nil
-}
-
-// joinFunc is the inverse of splitFunc.
-func joinFunc(in, out []string) string {
-	outs := ""
-	if len(out) == 1 {
-		outs = " " + out[0]
-	} else if len(out) > 1 {
-		outs = " (" + join(out) + ")"
-	}
-	return "func(" + join(in) + ")" + outs
-}
-
-// split splits "int, float" into ["int", "float"] and splits "" into [].
-func split(s string) []string {
-	out := []string{}
-	i := 0 // current type being scanned is s[i:j].
-	nparen := 0
-	for j := 0; j < len(s); j++ {
-		switch s[j] {
-		case ' ':
-			if i == j {
-				i++
-			}
-		case '(':
-			nparen++
-		case ')':
-			nparen--
-			if nparen < 0 {
-				// probably can't happen
-				return nil
-			}
-		case ',':
-			if nparen == 0 {
-				if i < j {
-					out = append(out, s[i:j])
-				}
-				i = j + 1
-			}
-		}
-	}
-	if nparen != 0 {
-		// probably can't happen
-		return nil
-	}
-	if i < len(s) {
-		out = append(out, s[i:])
-	}
-	return out
-}
-
-// join is the inverse of split.
-func join(x []string) string {
-	return strings.Join(x, ", ")
-}
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@ -11,7 +11,7 @@ require (
 	golang.org/x/sys v0.36.0
 	golang.org/x/telemetry v0.0.0-20250908211612-aef8a434d053
 	golang.org/x/term v0.34.0
-	golang.org/x/tools v0.37.1-0.20250915202913-9fccddc465ef
+	golang.org/x/tools v0.37.1-0.20250924232827-4df13e317ce4
 )

 require (
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@ -22,7 +22,7 @@ golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
 golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
 golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
 golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
-golang.org/x/tools v0.37.1-0.20250915202913-9fccddc465ef h1:ISPkUgvOYIt0oS7oVnwAPktCKBvgWkDlWWGMgX0veZM=
-golang.org/x/tools v0.37.1-0.20250915202913-9fccddc465ef/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
+golang.org/x/tools v0.37.1-0.20250924232827-4df13e317ce4 h1:IcXDtHggZZo+GzNzvVRPyNFLnOc2/Z1gg3ZVIWF2uCU=
+golang.org/x/tools v0.37.1-0.20250924232827-4df13e317ce4/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef h1:mqLYrXCXYEZOop9/Dbo6RPX11539nwiCNBb1icVPmw8=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef/go.mod h1:8xcPgWmwlZONN1D9bjxtHEjrUtSEa3fakVF8iaewYKQ=
--- a/src/cmd/go/go_test.go
+++ b/src/cmd/go/go_test.go
@ -9,7 +9,6 @@ import (
 	"debug/elf"
 	"debug/macho"
 	"debug/pe"
-	"encoding/binary"
 	"flag"
 	"fmt"
 	"go/format"
@ -2131,38 +2130,6 @@ func testBuildmodePIE(t *testing.T, useCgo, setBuildmodeToPIE bool) {
 		if (dc & pe.IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE) == 0 {
 			t.Error("IMAGE_DLLCHARACTERISTICS_DYNAMIC_BASE flag is not set")
 		}
-		if useCgo {
-			// Test that only one symbol is exported (#40795).
-			// PIE binaries don´t require .edata section but unfortunately
-			// binutils doesn´t generate a .reloc section unless there is
-			// at least one symbol exported.
-			// See https://sourceware.org/bugzilla/show_bug.cgi?id=19011
-			section := f.Section(".edata")
-			if section == nil {
-				t.Skip(".edata section is not present")
-			}
-			// TODO: deduplicate this struct from cmd/link/internal/ld/pe.go
-			type IMAGE_EXPORT_DIRECTORY struct {
-				_                 [2]uint32
-				_                 [2]uint16
-				_                 [2]uint32
-				NumberOfFunctions uint32
-				NumberOfNames     uint32
-				_                 [3]uint32
-			}
-			var e IMAGE_EXPORT_DIRECTORY
-			if err := binary.Read(section.Open(), binary.LittleEndian, &e); err != nil {
-				t.Fatalf("binary.Read failed: %v", err)
-			}
-
-			// Only _cgo_dummy_export should be exported
-			if e.NumberOfFunctions != 1 {
-				t.Fatalf("got %d exported functions; want 1", e.NumberOfFunctions)
-			}
-			if e.NumberOfNames != 1 {
-				t.Fatalf("got %d exported names; want 1", e.NumberOfNames)
-			}
-		}
 	default:
 		// testBuildmodePIE opens object files, so it needs to understand the object
 		// file format.
--- a/src/cmd/internal/obj/riscv/anames.go
+++ b/src/cmd/internal/obj/riscv/anames.go
@ -195,20 +195,16 @@ var Anames = []string{
 	"FLTQ",
 	"FCLASSQ",
 	"CLWSP",
-	"CFLWSP",
 	"CLDSP",
 	"CFLDSP",
 	"CSWSP",
 	"CSDSP",
-	"CFSWSP",
 	"CFSDSP",
 	"CLW",
 	"CLD",
-	"CFLW",
 	"CFLD",
 	"CSW",
 	"CSD",
-	"CFSW",
 	"CFSD",
 	"CJ",
 	"CJR",
--- a/src/cmd/internal/obj/riscv/cpu.go
+++ b/src/cmd/internal/obj/riscv/cpu.go
@ -588,22 +588,18 @@ const (

 	// 26.3.1: Compressed Stack-Pointer-Based Loads and Stores
 	ACLWSP
-	ACFLWSP
 	ACLDSP
 	ACFLDSP
 	ACSWSP
 	ACSDSP
-	ACFSWSP
 	ACFSDSP

 	// 26.3.2: Compressed Register-Based Loads and Stores
 	ACLW
 	ACLD
-	ACFLW
 	ACFLD
 	ACSW
 	ACSD
-	ACFSW
 	ACFSD

 	// 26.4: Compressed Control Transfer Instructions
--- a/src/cmd/internal/obj/riscv/obj.go
+++ b/src/cmd/internal/obj/riscv/obj.go
@ -2176,6 +2176,12 @@ var instructions = [ALAST & obj.AMask]instructionData{
 	AVSOXEI32V & obj.AMask: {enc: sVIVEncoding},
 	AVSOXEI64V & obj.AMask: {enc: sVIVEncoding},

+	// 31.7.7: Unit-stride Fault-Only-First Loads
+	AVLE8FFV & obj.AMask:  {enc: iVEncoding},
+	AVLE16FFV & obj.AMask: {enc: iVEncoding},
+	AVLE32FFV & obj.AMask: {enc: iVEncoding},
+	AVLE64FFV & obj.AMask: {enc: iVEncoding},
+
 	// 31.7.8: Vector Load/Store Segment Instructions
 	AVLSEG2E8V & obj.AMask:     {enc: iVEncoding},
 	AVLSEG3E8V & obj.AMask:     {enc: iVEncoding},
@ -3839,7 +3845,7 @@ func instructionsForProg(p *obj.Prog) []*instruction {
 			ins.rs1 = uint32(p.From.Offset)
 		}

-	case AVLE8V, AVLE16V, AVLE32V, AVLE64V, AVSE8V, AVSE16V, AVSE32V, AVSE64V, AVLMV, AVSMV,
+	case AVLE8V, AVLE16V, AVLE32V, AVLE64V, AVSE8V, AVSE16V, AVSE32V, AVSE64V, AVLE8FFV, AVLE16FFV, AVLE32FFV, AVLE64FFV, AVLMV, AVSMV,
 		AVLSEG2E8V, AVLSEG3E8V, AVLSEG4E8V, AVLSEG5E8V, AVLSEG6E8V, AVLSEG7E8V, AVLSEG8E8V,
 		AVLSEG2E16V, AVLSEG3E16V, AVLSEG4E16V, AVLSEG5E16V, AVLSEG6E16V, AVLSEG7E16V, AVLSEG8E16V,
 		AVLSEG2E32V, AVLSEG3E32V, AVLSEG4E32V, AVLSEG5E32V, AVLSEG6E32V, AVLSEG7E32V, AVLSEG8E32V,
--- a/src/cmd/link/dwarf_test.go
+++ b/src/cmd/link/dwarf_test.go
@ -358,3 +358,65 @@ func TestDWARFLocationList(t *testing.T) {
 		}
 	}
 }
+
+func TestFlagW(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+	t.Parallel()
+
+	tmpdir := t.TempDir()
+	src := filepath.Join(tmpdir, "a.go")
+	err := os.WriteFile(src, []byte(helloSrc), 0666)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	type testCase struct {
+		flag      string
+		wantDWARF bool
+	}
+	tests := []testCase{
+		{"-w", false},     // -w flag disables DWARF
+		{"-s", false},     // -s implies -w
+		{"-s -w=0", true}, // -w=0 negates the implied -w
+	}
+	if testenv.HasCGO() {
+		tests = append(tests,
+			testCase{"-w -linkmode=external", false},
+			testCase{"-s -linkmode=external", false},
+			// Some external linkers don't have a way to preserve DWARF
+			// without emitting the symbol table. Skip this case for now.
+			// I suppose we can post- process, e.g. with objcopy.
+			//testCase{"-s -w=0 -linkmode=external", true},
+		)
+	}
+
+	for _, test := range tests {
+		name := strings.ReplaceAll(test.flag, " ", "_")
+		t.Run(name, func(t *testing.T) {
+			ldflags := "-ldflags=" + test.flag
+			exe := filepath.Join(t.TempDir(), "a.exe")
+			cmd := testenv.Command(t, testenv.GoToolPath(t), "build", ldflags, "-o", exe, src)
+			out, err := cmd.CombinedOutput()
+			if err != nil {
+				t.Fatalf("build failed: %v\n%s", err, out)
+			}
+
+			f, err := objfile.Open(exe)
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer f.Close()
+
+			d, err := f.DWARF()
+			if test.wantDWARF {
+				if err != nil {
+					t.Errorf("want binary with DWARF, got error %v", err)
+				}
+			} else {
+				if d != nil {
+					t.Errorf("want binary with no DWARF, got DWARF")
+				}
+			}
+		})
+	}
+}
--- a/src/cmd/link/internal/amd64/asm.go
+++ b/src/cmd/link/internal/amd64/asm.go
@ -208,7 +208,7 @@ func adddynrel(target *ld.Target, ldr *loader.Loader, syms *ld.ArchSyms, s loade
 		}
 		// The second relocation has the target symbol we want
 		su.SetRelocType(rIdx+1, objabi.R_PCREL)
-		su.SetRelocAdd(rIdx+1, r.Add()+int64(r.Off())-off)
+		su.SetRelocAdd(rIdx+1, r.Add()+int64(r.Off())+int64(r.Siz())-off)
 		// Remove the other relocation
 		su.SetRelocSiz(rIdx, 0)
 		return true
--- a/src/cmd/link/internal/ld/lib.go
+++ b/src/cmd/link/internal/ld/lib.go
@ -1451,6 +1451,8 @@ func (ctxt *Link) hostlink() {
 		} else {
 			argv = append(argv, "-s")
 		}
+	} else if *FlagW {
+		argv = append(argv, "-Wl,-S") // suppress debugging symbols
 	}

 	// On darwin, whether to combine DWARF into executable.
--- a/src/cmd/link/internal/ld/pe.go
+++ b/src/cmd/link/internal/ld/pe.go
@ -487,9 +487,6 @@ func (f *peFile) addDWARFSection(name string, size int) *peSection {

 // addDWARF adds DWARF information to the COFF file f.
 func (f *peFile) addDWARF() {
-	if *FlagS { // disable symbol table
-		return
-	}
 	if *FlagW { // disable dwarf
 		return
 	}
--- a/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/copylock/copylock.go
+++ b/src/cmd/vendor/golang.org/x/tools/go/analysis/passes/copylock/copylock.go
@ -157,7 +157,10 @@ func checkCopyLocksCallExpr(pass *analysis.Pass, ce *ast.CallExpr) {
 	}
 	if fun, ok := pass.TypesInfo.Uses[id].(*types.Builtin); ok {
 		switch fun.Name() {
-		case "new", "len", "cap", "Sizeof", "Offsetof", "Alignof":
+		case "len", "cap", "Sizeof", "Offsetof", "Alignof":
+			// The argument of this operation is used only
+			// for its type (e.g. len(array)), or the operation
+			// does not copy a lock (e.g. len(slice)).
 			return
 		}
 	}
--- a/src/cmd/vendor/golang.org/x/tools/internal/analysisinternal/analysis.go
+++ b/src/cmd/vendor/golang.org/x/tools/internal/analysisinternal/analysis.go
@ -193,18 +193,23 @@ func CheckReadable(pass *analysis.Pass, filename string) error {
 	return fmt.Errorf("Pass.ReadFile: %s is not among OtherFiles, IgnoredFiles, or names of Files", filename)
 }

-// AddImport checks whether this file already imports pkgpath and
-// that import is in scope at pos. If so, it returns the name under
-// which it was imported and a zero edit. Otherwise, it adds a new
-// import of pkgpath, using a name derived from the preferred name,
-// and returns the chosen name, a prefix to be concatenated with member
-// to form a qualified name, and the edit for the new import.
+// AddImport checks whether this file already imports pkgpath and that
+// the import is in scope at pos. If so, it returns the name under
+// which it was imported and no edits. Otherwise, it adds a new import
+// of pkgpath, using a name derived from the preferred name, and
+// returns the chosen name, a prefix to be concatenated with member to
+// form a qualified name, and the edit for the new import.
 //
-// In the special case that pkgpath is dot-imported then member, the
-// identifier for which the import is being added, is consulted. If
-// member is not shadowed at pos, AddImport returns (".", "", nil).
-// (AddImport accepts the caller's implicit claim that the imported
-// package declares member.)
+// The member argument indicates the name of the desired symbol within
+// the imported package. This is needed in the case when the existing
+// import is a dot import, because then it is possible that the
+// desired symbol is shadowed by other declarations in the current
+// package. If member is not shadowed at pos, AddImport returns (".",
+// "", nil). (AddImport accepts the caller's implicit claim that the
+// imported package declares member.)
+//
+// Use a preferredName of "_" to request a blank import;
+// member is ignored in this case.
 //
 // It does not mutate its arguments.
 func AddImport(info *types.Info, file *ast.File, preferredName, pkgpath, member string, pos token.Pos) (name, prefix string, newImport []analysis.TextEdit) {
@ -220,6 +225,10 @@ func AddImport(info *types.Info, file *ast.File, preferredName, pkgpath, member
 		pkgname := info.PkgNameOf(spec)
 		if pkgname != nil && pkgname.Imported().Path() == pkgpath {
 			name = pkgname.Name()
+			if preferredName == "_" {
+				// Request for blank import; any existing import will do.
+				return name, "", nil
+			}
 			if name == "." {
 				// The scope of ident must be the file scope.
 				if s, _ := scope.LookupParent(member, pos); s == info.Scopes[file] {
@ -232,8 +241,12 @@ func AddImport(info *types.Info, file *ast.File, preferredName, pkgpath, member
 	}

 	// We must add a new import.
+
 	// Ensure we have a fresh name.
-	newName := FreshName(scope, pos, preferredName)
+	newName := preferredName
+	if preferredName != "_" {
+		newName = FreshName(scope, pos, preferredName)
+	}

 	// Create a new import declaration either before the first existing
 	// declaration (which must exist), including its comments; or
@ -246,6 +259,7 @@ func AddImport(info *types.Info, file *ast.File, preferredName, pkgpath, member
 	if newName != preferredName || newName != pathpkg.Base(pkgpath) {
 		newText = fmt.Sprintf("%s %q", newName, pkgpath)
 	}
+
 	decl0 := file.Decls[0]
 	var before ast.Node = decl0
 	switch decl0 := decl0.(type) {
--- a/src/cmd/vendor/modules.txt
+++ b/src/cmd/vendor/modules.txt
@ -73,7 +73,7 @@ golang.org/x/text/internal/tag
 golang.org/x/text/language
 golang.org/x/text/transform
 golang.org/x/text/unicode/norm
-# golang.org/x/tools v0.37.1-0.20250915202913-9fccddc465ef
+# golang.org/x/tools v0.37.1-0.20250924232827-4df13e317ce4
 ## explicit; go 1.24.0
 golang.org/x/tools/cmd/bisect
 golang.org/x/tools/cover
--- a/src/go/token/position.go
+++ b/src/go/token/position.go
@ -531,11 +531,11 @@ func (s *FileSet) AddExistingFiles(files ...*File) {
 //
 // Removing a file that does not belong to the set has no effect.
 func (s *FileSet) RemoveFile(file *File) {
-	s.last.CompareAndSwap(file, nil) // clear last file cache
-
 	s.mutex.Lock()
 	defer s.mutex.Unlock()

+	s.last.CompareAndSwap(file, nil) // clear last file cache
+
 	pn, _ := s.tree.locate(file.key())
 	if *pn != nil && (*pn).file == file {
 		s.tree.delete(pn)
--- a/src/go/token/position_test.go
+++ b/src/go/token/position_test.go
@ -579,3 +579,67 @@ func fsetString(fset *FileSet) string {
 	buf.WriteRune('}')
 	return buf.String()
 }
+
+// Test that File() does not return the already removed file, while used concurrently.
+func TestRemoveFileRace(t *testing.T) {
+	fset := NewFileSet()
+
+	// Create bunch of files.
+	var files []*File
+	for i := range 20000 {
+		f := fset.AddFile("f", -1, (i+1)*10)
+		files = append(files, f)
+	}
+
+	// governor goroutine
+	race1, race2 := make(chan *File), make(chan *File)
+	start := make(chan struct{})
+	go func() {
+		for _, f := range files {
+			<-start
+			race1 <- f
+			race2 <- f
+		}
+		<-start // unlock main test goroutine
+		close(race1)
+		close(race2)
+	}()
+
+	go func() {
+		for f := range race1 {
+			fset.File(Pos(f.Base()) + 5) // populates s.last with f
+		}
+	}()
+
+	start <- struct{}{}
+	for f := range race2 {
+		fset.RemoveFile(f)
+		got := fset.File(Pos(f.Base()) + 5)
+		if got != nil {
+			t.Fatalf("file was not removed correctly")
+		}
+		start <- struct{}{}
+	}
+}
+
+func TestRemovedFileFileReturnsNil(t *testing.T) {
+	fset := NewFileSet()
+
+	// Create bunch of files.
+	var files []*File
+	for i := range 1000 {
+		f := fset.AddFile("f", -1, (i+1)*100)
+		files = append(files, f)
+	}
+
+	rand.Shuffle(len(files), func(i, j int) {
+		files[i], files[j] = files[j], files[i]
+	})
+
+	for _, f := range files {
+		fset.RemoveFile(f)
+		if got := fset.File(Pos(f.Base()) + 10); got != nil {
+			t.Fatalf("file was not removed correctly; got file with base: %v", got.Base())
+		}
+	}
+}
--- a/src/go/types/builtins.go
+++ b/src/go/types/builtins.go
@ -101,17 +101,17 @@ func (check *Checker) builtin(x *operand, call *ast.CallExpr, id builtinId) (_ b
 			if ok, _ := x.assignableTo(check, NewSlice(universeByte), nil); ok {
 				y := args[1]
 				hasString := false
-				typeset(y.typ, func(_, u Type) bool {
+				for _, u := range typeset(y.typ) {
 					if s, _ := u.(*Slice); s != nil && Identical(s.elem, universeByte) {
-						return true
-					}
-					if isString(u) {
+						// typeset ⊇ {[]byte}
+					} else if isString(u) {
+						// typeset ⊇ {string}
 						hasString = true
-						return true
+					} else {
+						y = nil
+						break
 					}
-					y = nil
-					return false
-				})
+				}
 				if y != nil && hasString {
 					// setting the signature also signals that we're done
 					sig = makeSig(x.typ, x.typ, y.typ)
@ -371,16 +371,16 @@ func (check *Checker) builtin(x *operand, call *ast.CallExpr, id builtinId) (_ b
 		var special bool
 		if ok, _ := x.assignableTo(check, NewSlice(universeByte), nil); ok {
 			special = true
-			typeset(y.typ, func(_, u Type) bool {
+			for _, u := range typeset(y.typ) {
 				if s, _ := u.(*Slice); s != nil && Identical(s.elem, universeByte) {
-					return true
+					// typeset ⊇ {[]byte}
+				} else if isString(u) {
+					// typeset ⊇ {string}
+				} else {
+					special = false
+					break
 				}
-				if isString(u) {
-					return true
-				}
-				special = false
-				return false
-			})
+			}
 		}

 		// general case
@ -639,11 +639,30 @@ func (check *Checker) builtin(x *operand, call *ast.CallExpr, id builtinId) (_ b
 		}

 	case _New:
-		// new(T)
+		// new(T) or new(expr)
 		// (no argument evaluated yet)
-		T := check.varType(argList[0])
-		if !isValid(T) {
-			return
+		arg := argList[0]
+		check.exprOrType(x, arg, true)
+		var T Type
+		switch x.mode {
+		case builtin:
+			check.errorf(x, UncalledBuiltin, "%s must be called", x)
+			x.mode = invalid
+		case typexpr:
+			// new(T)
+			T = x.typ
+			if !isValid(T) {
+				return
+			}
+		default:
+			// new(expr)
+			check.verifyVersionf(call.Fun, go1_26, "new(expr)")
+			T = Default(x.typ)
+			if T != x.typ {
+				// untyped constant: check for overflow.
+				check.assignment(x, T, "argument to new")
+			}
+			check.validVarType(arg, T)
 		}

 		x.mode = value
@ -964,29 +983,22 @@ func (check *Checker) builtin(x *operand, call *ast.CallExpr, id builtinId) (_ b
 // or a type error if x is not a slice (or a type set of slices).
 func sliceElem(x *operand) (Type, *typeError) {
 	var E Type
-	var err *typeError
-	typeset(x.typ, func(_, u Type) bool {
+	for _, u := range typeset(x.typ) {
 		s, _ := u.(*Slice)
 		if s == nil {
 			if x.isNil() {
 				// Printing x in this case would just print "nil".
 				// Special case this so we can emphasize "untyped".
-				err = typeErrorf("argument must be a slice; have untyped nil")
+				return nil, typeErrorf("argument must be a slice; have untyped nil")
 			} else {
-				err = typeErrorf("argument must be a slice; have %s", x)
+				return nil, typeErrorf("argument must be a slice; have %s", x)
 			}
-			return false
 		}
 		if E == nil {
 			E = s.elem
 		} else if !Identical(E, s.elem) {
-			err = typeErrorf("mismatched slice element types %s and %s in %s", E, s.elem, x)
-			return false
+			return nil, typeErrorf("mismatched slice element types %s and %s in %s", E, s.elem, x)
 		}
-		return true
-	})
-	if err != nil {
-		return nil, err
 	}
 	return E, nil
 }
--- a/src/go/types/index.go
+++ b/src/go/types/index.go
@ -218,7 +218,8 @@ func (check *Checker) sliceExpr(x *operand, e *ast.SliceExpr) {
 	// determine common underlying type cu
 	var ct, cu Type // type and respective common underlying type
 	var hasString bool
-	typeset(x.typ, func(t, u Type) bool {
+	// TODO(adonovan): use go1.23 "range typeset()".
+	typeset(x.typ)(func(t, u Type) bool {
 		if u == nil {
 			check.errorf(x, NonSliceableOperand, "cannot slice %s: no specific type in %s", x, x.typ)
 			cu = nil
--- a/src/go/types/signature.go
+++ b/src/go/types/signature.go
@ -62,7 +62,7 @@ func NewSignatureType(recv *Var, recvTypeParams, typeParams []*TypeParam, params
 		}
 		last := params.At(n - 1).typ
 		var S *Slice
-		typeset(last, func(t, _ Type) bool {
+		for t := range typeset(last) {
 			var s *Slice
 			if isString(t) {
 				s = NewSlice(universeByte)
@ -73,10 +73,9 @@ func NewSignatureType(recv *Var, recvTypeParams, typeParams []*TypeParam, params
 				S = s
 			} else if !Identical(S, s) {
 				S = nil
-				return false
+				break
 			}
-			return true
-		})
+		}
 		if S == nil {
 			panic(fmt.Sprintf("got %s, want variadic parameter of unnamed slice or string type", last))
 		}
--- a/src/go/types/stdlib_test.go
+++ b/src/go/types/stdlib_test.go
@ -362,6 +362,7 @@ func TestStdKen(t *testing.T) {
 var excluded = map[string]bool{
 	"builtin":                       true,
 	"cmd/compile/internal/ssa/_gen": true,
+	"runtime/_mkmalloc":             true,
 	"simd/_gen/simdgen":             true,
 	"simd/_gen/unify":               true,
 }
--- a/src/go/types/typeparam.go
+++ b/src/go/types/typeparam.go
@ -158,10 +158,10 @@ func (t *TypeParam) is(f func(*term) bool) bool {
 	return t.iface().typeSet().is(f)
 }

-// typeset is an iterator over the (type/underlying type) pairs of the
+// typeset reports whether f(t, y) is true for all (type/underlying type) pairs of the
 // specific type terms of t's constraint.
-// If there are no specific terms, typeset calls yield with (nil, nil).
-// In any case, typeset is guaranteed to call yield at least once.
-func (t *TypeParam) typeset(yield func(t, u Type) bool) {
-	t.iface().typeSet().typeset(yield)
+// If there are no specific terms, typeset returns f(nil, nil).
+// In any case, typeset is guaranteed to call f at least once.
+func (t *TypeParam) typeset(f func(t, u Type) bool) bool {
+	return t.iface().typeSet().all(f)
 }
--- a/src/go/types/typeset.go
+++ b/src/go/types/typeset.go
@ -107,13 +107,12 @@ func (s *_TypeSet) hasTerms() bool { return !s.terms.isEmpty() && !s.terms.isAll
 // subsetOf reports whether s1 ⊆ s2.
 func (s1 *_TypeSet) subsetOf(s2 *_TypeSet) bool { return s1.terms.subsetOf(s2.terms) }

-// typeset is an iterator over the (type/underlying type) pairs in s.
-// If s has no specific terms, typeset calls yield with (nil, nil).
-// In any case, typeset is guaranteed to call yield at least once.
-func (s *_TypeSet) typeset(yield func(t, u Type) bool) {
+// all reports whether f(t, u) is true for each (type/underlying type) pairs in s.
+// If s has no specific terms, all calls f(nil, nil).
+// In any case, all is guaranteed to call f at least once.
+func (s *_TypeSet) all(f func(t, u Type) bool) bool {
 	if !s.hasTerms() {
-		yield(nil, nil)
-		return
+		return f(nil, nil)
 	}

 	for _, t := range s.terms {
@ -126,10 +125,11 @@ func (s *_TypeSet) typeset(yield func(t, u Type) bool) {
 		if debug {
 			assert(Identical(u, under(u)))
 		}
-		if !yield(t.typ, u) {
-			break
+		if !f(t.typ, u) {
+			return false
 		}
 	}
+	return true
 }

 // is calls f with the specific type terms of s and reports whether
--- a/src/go/types/under.go
+++ b/src/go/types/under.go
@ -7,6 +7,8 @@

 package types

+import "iter"
+
 // under returns the true expanded underlying type.
 // If it doesn't exist, the result is Typ[Invalid].
 // under must only be called when a type is known
@ -21,12 +23,18 @@ func under(t Type) Type {
 // If typ is a type parameter, underIs returns the result of typ.underIs(f).
 // Otherwise, underIs returns the result of f(under(typ)).
 func underIs(typ Type, f func(Type) bool) bool {
-	var ok bool
-	typeset(typ, func(_, u Type) bool {
-		ok = f(u)
-		return ok
+	return all(typ, func(_, u Type) bool {
+		return f(u)
 	})
-	return ok
+}
+
+// all reports whether f(t, u) is true for all (type/underlying type)
+// pairs in the typeset of t. See [typeset] for details of sequence.
+func all(t Type, f func(t, u Type) bool) bool {
+	if p, _ := Unalias(t).(*TypeParam); p != nil {
+		return p.typeset(f)
+	}
+	return f(t, under(t))
 }

 // typeset is an iterator over the (type/underlying type) pairs of the
@ -35,12 +43,10 @@ func underIs(typ Type, f func(Type) bool) bool {
 // In that case, if there are no specific terms, typeset calls yield with (nil, nil).
 // If t is not a type parameter, the implied type set consists of just t.
 // In any case, typeset is guaranteed to call yield at least once.
-func typeset(t Type, yield func(t, u Type) bool) {
-	if p, _ := Unalias(t).(*TypeParam); p != nil {
-		p.typeset(yield)
-		return
+func typeset(t Type) iter.Seq2[Type, Type] {
+	return func(yield func(t, u Type) bool) {
+		_ = all(t, yield)
 	}
-	yield(t, under(t))
 }

 // A typeError describes a type error.
@ -83,35 +89,28 @@ func (err *typeError) format(check *Checker) string {
 // with the single type t in its type set.
 func commonUnder(t Type, cond func(t, u Type) *typeError) (Type, *typeError) {
 	var ct, cu Type // type and respective common underlying type
-	var err *typeError
-
-	bad := func(format string, args ...any) bool {
-		err = typeErrorf(format, args...)
-		return false
-	}
-
-	typeset(t, func(t, u Type) bool {
+	for t, u := range typeset(t) {
 		if cond != nil {
-			if err = cond(t, u); err != nil {
-				return false
+			if err := cond(t, u); err != nil {
+				return nil, err
 			}
 		}

 		if u == nil {
-			return bad("no specific type")
+			return nil, typeErrorf("no specific type")
 		}

 		// If this is the first type we're seeing, we're done.
 		if cu == nil {
 			ct, cu = t, u
-			return true
+			continue
 		}

 		// If we've seen a channel before, and we have a channel now, they must be compatible.
 		if chu, _ := cu.(*Chan); chu != nil {
 			if ch, _ := u.(*Chan); ch != nil {
 				if !Identical(chu.elem, ch.elem) {
-					return bad("channels %s and %s have different element types", ct, t)
+					return nil, typeErrorf("channels %s and %s have different element types", ct, t)
 				}
 				// If we have different channel directions, keep the restricted one
 				// and complain if they conflict.
@ -121,22 +120,16 @@ func commonUnder(t Type, cond func(t, u Type) *typeError) (Type, *typeError) {
 				case chu.dir == SendRecv:
 					ct, cu = t, u // switch to restricted channel
 				case ch.dir != SendRecv:
-					return bad("channels %s and %s have conflicting directions", ct, t)
+					return nil, typeErrorf("channels %s and %s have conflicting directions", ct, t)
 				}
-				return true
+				continue
 			}
 		}

 		// Otherwise, the current type must have the same underlying type as all previous types.
 		if !Identical(cu, u) {
-			return bad("%s and %s have different underlying types", ct, t)
+			return nil, typeErrorf("%s and %s have different underlying types", ct, t)
 		}
-
-		return true
-	})
-
-	if err != nil {
-		return nil, err
 	}
 	return cu, nil
 }
--- a/src/go/types/version.go
+++ b/src/go/types/version.go
@ -43,6 +43,7 @@ var (
 	go1_21 = asGoVersion("go1.21")
 	go1_22 = asGoVersion("go1.22")
 	go1_23 = asGoVersion("go1.23")
+	go1_26 = asGoVersion("go1.26")

 	// current (deployed) Go version
 	go_current = asGoVersion(fmt.Sprintf("go1.%d", goversion.Version))
--- a/src/image/decode_example_test.go
+++ b/src/image/decode_example_test.go
@ -70,22 +70,22 @@ func Example() {
 	}
 	// Output:
 	// bin               red  green   blue  alpha
-	// 0x0000-0x0fff:    364    790   7242      0
-	// 0x1000-0x1fff:    645   2967   1039      0
-	// 0x2000-0x2fff:   1072   2299    979      0
-	// 0x3000-0x3fff:    820   2266    980      0
-	// 0x4000-0x4fff:    537   1305    541      0
-	// 0x5000-0x5fff:    319    962    261      0
-	// 0x6000-0x6fff:    322    375    177      0
-	// 0x7000-0x7fff:    601    279    214      0
-	// 0x8000-0x8fff:   3478    227    273      0
-	// 0x9000-0x9fff:   2260    234    329      0
-	// 0xa000-0xafff:    921    282    373      0
-	// 0xb000-0xbfff:    321    335    397      0
-	// 0xc000-0xcfff:    229    388    298      0
-	// 0xd000-0xdfff:    260    414    277      0
-	// 0xe000-0xefff:    516    428    298      0
-	// 0xf000-0xffff:   2785   1899   1772  15450
+	// 0x0000-0x0fff:    362    793   7245      0
+	// 0x1000-0x1fff:    648   2963   1036      0
+	// 0x2000-0x2fff:   1072   2301    977      0
+	// 0x3000-0x3fff:    819   2266    982      0
+	// 0x4000-0x4fff:    537   1303    541      0
+	// 0x5000-0x5fff:    321    964    261      0
+	// 0x6000-0x6fff:    321    375    177      0
+	// 0x7000-0x7fff:    599    278    213      0
+	// 0x8000-0x8fff:   3478    228    275      0
+	// 0x9000-0x9fff:   2260    233    328      0
+	// 0xa000-0xafff:    921    282    374      0
+	// 0xb000-0xbfff:    322    335    395      0
+	// 0xc000-0xcfff:    228    388    299      0
+	// 0xd000-0xdfff:    261    415    277      0
+	// 0xe000-0xefff:    516    423    297      0
+	// 0xf000-0xffff:   2785   1903   1773  15450
 }

 const data = `
--- a/src/image/jpeg/dct.go
+++ b/src/image/jpeg/dct.go
@ -0,0 +1,521 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package jpeg
+
+// Discrete Cosine Transformation (DCT) implementations using the algorithm from
+// Christoph Loeffler, Adriaan Lightenberg, and George S. Mostchytz,
+// “Practical Fast 1-D DCT Algorithms with 11 Multiplications,” ICASSP 1989.
+// https://ieeexplore.ieee.org/document/266596
+//
+// Since the paper is paywalled, the rest of this comment gives a summary.
+//
+// A 1-dimensional forward DCT (1D FDCT) takes as input 8 values x0..x7
+// and transforms them in place into the result values.
+//
+// The mathematical definition of the N-point 1D FDCT is:
+//
+//	X[k] = α_k Σ_n x[n] * cos (2n+1)*k*π/2N
+//
+// where α₀ = √2 and α_k = 1 for k > 0.
+//
+// For our purposes, N=8, so the angles end up being multiples of π/16.
+// The most direct implementation of this definition would require 64 multiplications.
+//
+// Loeffler's paper presents a more efficient computation that requires only
+// 11 multiplications and works in terms of three basic operations:
+//
+//  - A “butterfly” x0, x1 = x0+x1, x0-x1.
+//    The inverse is x0, x1 = (x0+x1)/2, (x0-x1)/2.
+//
+//  - A scaling of x0 by k: x0 *= k. The inverse is scaling by 1/k.
+//
+//  - A rotation of x0, x1 by θ, defined as:
+//    x0, x1 = x0 cos θ + x1 sin θ, -x0 sin θ + x1 cos θ.
+//    The inverse is rotation by -θ.
+//
+// The algorithm proceeds in four stages:
+//
+// Stage 1:
+//  - butterfly x0, x7; x1, x6; x2, x5; x3, x4.
+//
+// Stage 2:
+//  - butterfly x0, x3; x1, x2
+//  - rotate x4, x7 by 3π/16
+//  - rotate x5, x6 by π/16.
+//
+// Stage 3:
+//  - butterfly x0, x1; x4, x6; x7, x5
+//  - rotate x2, x3 by 6π/16 and scale by √2.
+//
+// Stage 4:
+//  - butterfly x7, x4
+//  - scale x5, x6 by √2.
+//
+// Finally, the values are permuted. The permutation can be read as either:
+//  - x0, x4, x2, x6, x7, x3, x5, x1 = x0, x1, x2, x3, x4, x5, x6, x7 (paper's form)
+//  - x0, x1, x2, x3, x4, x5, x6, x7 = x0, x7, x2, x5, x1, x6, x3, x4 (sorted by LHS)
+// The code below uses the second form to make it easier to merge adjacent stores.
+// (Note that unlike in recursive FFT implementations, the permutation here is
+// not always mapping indexes to their bit reversals.)
+//
+// As written above, the rotation requires four multiplications, but it can be
+// reduced to three by refactoring (see [dctBox] below), and the scaling in
+// stage 3 can be merged into the rotation constants, so the overall cost
+// of a 1D FDCT is 11 multiplies.
+//
+// The 1D inverse DCT (IDCT) is the 1D FDCT run backward
+// with all the basic operations inverted.
+
+// dctBox implements a 3-multiply, 3-add rotation+scaling.
+// Given x0, x1, k*cos θ, and k*sin θ, dctBox returns the
+// rotated and scaled coordinates.
+// (It is called dctBox because the rotate+scale operation
+// is drawn as a box in Figures 1 and 2 in the paper.)
+func dctBox(x0, x1, kcos, ksin int32) (y0, y1 int32) {
+	// y0 = x0*kcos + x1*ksin
+	// y1 = -x0*ksin + x1*kcos
+	ksum := kcos * (x0 + x1)
+	y0 = ksum + (ksin-kcos)*x1
+	y1 = ksum - (kcos+ksin)*x0
+	return y0, y1
+}
+
+// A block is an 8x8 input to a 2D DCT (either the FDCT or IDCT).
+// The input is actually only 8x8 uint8 values, and the outputs are 8x8 int16,
+// but it is convenient to use int32s for intermediate storage,
+// so we define only a single block type of [8*8]int32.
+//
+// A 2D DCT is implemented as 1D DCTs over the rows and columns.
+//
+// dct_test.go defines a String method for nice printing in tests.
+type block [blockSize]int32
+
+const blockSize = 8 * 8
+
+// Note on Numerical Precision
+//
+// The inputs to both the FDCT and IDCT are uint8 values stored in a block,
+// and the outputs are int16s in the same block, but the overall operation
+// uses int32 values as fixed-point intermediate values.
+// In the code comments below, the notation “QN.M” refers to a
+// signed value of 1+N+M significant bits, one of which is the sign bit,
+// and M of which hold fractional (sub-integer) precision.
+// For example, 255 as a Q8.0 value is stored as int32(255),
+// while 255 as a Q8.1 value is stored as int32(510),
+// and 255.5 as a Q8.1 value is int32(511).
+// The notation UQN.M refers to an unsigned value of N+M significant bits.
+// See https://en.wikipedia.org/wiki/Q_(number_format) for more.
+//
+// In general we only need to keep about 16 significant bits, but it is more
+// efficient and somewhat more precise to let unnecessary fractional bits
+// accumulate and shift them away in bulk rather than after every operation.
+// As such, it is important to keep track of the number of fractional bits
+// in each variable at different points in the code, to avoid mistakes like
+// adding numbers with different fractional precisions, as well as to keep
+// track of the total number of bits, to avoid overflow. A comment like:
+//
+//	// x[123] now Q8.2.
+//
+// means that x1, x2, and x3 are all Q8.2 (11-bit) values.
+// Keeping extra precision bits also reduces the size of the errors introduced
+// by using right shift to approximate rounded division.
+
+// Constants needed for the implementation.
+// These are all 60-bit precision fixed-point constants.
+// The function c(val, b) rounds the constant to b bits.
+// c is simple enough that calls to it with constant args
+// are inlined and constant-propagated down to an inline constant.
+// Each constant is commented with its Ivy definition (see robpike.io/ivy),
+// using this scaling helper function:
+//
+//	op fix x = floor 0.5 + x * 2**60
+const (
+	cos1          = 1130768441178740757 // fix cos 1*pi/16
+	sin1          = 224923827593068887  // fix sin 1*pi/16
+	cos3          = 958619196450722178  // fix cos 3*pi/16
+	sin3          = 640528868967736374  // fix sin 3*pi/16
+	sqrt2         = 1630477228166597777 // fix sqrt 2
+	sqrt2_cos6    = 623956622067911264  // fix (sqrt 2)*cos 6*pi/16
+	sqrt2_sin6    = 1506364539328854985 // fix (sqrt 2)*sin 6*pi/16
+	sqrt2inv      = 815238614083298888  // fix 1/sqrt 2
+	sqrt2inv_cos6 = 311978311033955632  // fix (1/sqrt 2)*cos 6*pi/16
+	sqrt2inv_sin6 = 753182269664427492  // fix (1/sqrt 2)*sin 6*pi/16
+)
+
+func c(x uint64, bits int) int32 {
+	return int32((x + (1 << (59 - bits))) >> (60 - bits))
+}
+
+// fdct implements the forward DCT.
+// Inputs are UQ8.0; outputs are Q13.0.
+func fdct(b *block) {
+	fdctCols(b)
+	fdctRows(b)
+}
+
+// fdctCols applies the 1D DCT to the columns of b.
+// Inputs are UQ8.0 in [0,255] but interpreted as [-128,127].
+// Outputs are Q10.18.
+func fdctCols(b *block) {
+	for i := range 8 {
+		x0 := b[0*8+i]
+		x1 := b[1*8+i]
+		x2 := b[2*8+i]
+		x3 := b[3*8+i]
+		x4 := b[4*8+i]
+		x5 := b[5*8+i]
+		x6 := b[6*8+i]
+		x7 := b[7*8+i]
+
+		// x[01234567] are UQ8.0 in [0,255].
+
+		// Stage 1: four butterflies.
+		// In general a butterfly of QN.M inputs produces Q(N+1).M outputs.
+		// A butterfly of UQN.M inputs produces a UQ(N+1).M sum and a QN.M difference.
+
+		x0, x7 = x0+x7, x0-x7
+		x1, x6 = x1+x6, x1-x6
+		x2, x5 = x2+x5, x2-x5
+		x3, x4 = x3+x4, x3-x4
+		// x[0123] now UQ9.0 in [0, 510].
+		// x[4567] now Q8.0 in [-255,255].
+
+		// Stage 2: two boxes and two butterflies.
+		// A box on QN.M inputs with B-bit constants
+		// produces Q(N+1).(M+B) outputs.
+		// (The +1 is from the addition.)
+
+		x4, x7 = dctBox(x4, x7, c(cos3, 18), c(sin3, 18))
+		x5, x6 = dctBox(x5, x6, c(cos1, 18), c(sin1, 18))
+		// x[47] now Q9.18 in [-354, 354].
+		// x[56] now Q9.18 in [-300, 300].
+
+		x0, x3 = x0+x3, x0-x3
+		x1, x2 = x1+x2, x1-x2
+		// x[01] now UQ10.0 in [0, 1020].
+		// x[23] now Q9.0 in [-510, 510].
+
+		// Stage 3: one box and three butterflies.
+
+		x2, x3 = dctBox(x2, x3, c(sqrt2_cos6, 18), c(sqrt2_sin6, 18))
+		// x[23] now Q10.18 in [-943, 943].
+
+		x0, x1 = x0+x1, x0-x1
+		// x0 now UQ11.0 in [0, 2040].
+		// x1 now Q10.0 in [-1020, 1020].
+
+		// Store x0, x1, x2, x3 to their permuted targets.
+		// The original +128 in every input value
+		// has cancelled out except in the “DC signal” x0.
+		// Subtracting 128*8 here is equivalent to subtracting 128
+		// from every input before we started, but cheaper.
+		// It also converts x0 from UQ11.18 to Q10.18.
+		b[0*8+i] = (x0 - 128*8) << 18
+		b[4*8+i] = x1 << 18
+		b[2*8+i] = x2
+		b[6*8+i] = x3
+
+		x4, x6 = x4+x6, x4-x6
+		x7, x5 = x7+x5, x7-x5
+		// x[4567] now Q10.18 in [-654, 654].
+
+		// Stage 4: two √2 scalings and one butterfly.
+
+		x5 = (x5 >> 12) * c(sqrt2, 12)
+		x6 = (x6 >> 12) * c(sqrt2, 12)
+		// x[56] still Q10.18 in [-925, 925] (= 654√2).
+		x7, x4 = x7+x4, x7-x4
+		// x[47] still Q10.18 in [-925, 925] (not Q11.18!).
+		// This is not obvious at all! See “Note on 925” below.
+
+		// Store x4 x5 x6 x7 to their permuted targets.
+		b[1*8+i] = x7
+		b[3*8+i] = x5
+		b[5*8+i] = x6
+		b[7*8+i] = x4
+	}
+}
+
+// fdctRows applies the 1D DCT to the rows of b.
+// Inputs are Q10.18; outputs are Q13.0.
+func fdctRows(b *block) {
+	for i := range 8 {
+		x := b[8*i : 8*i+8 : 8*i+8]
+		x0 := x[0]
+		x1 := x[1]
+		x2 := x[2]
+		x3 := x[3]
+		x4 := x[4]
+		x5 := x[5]
+		x6 := x[6]
+		x7 := x[7]
+
+		// x[01234567] are Q10.18 [-1020, 1020].
+
+		// Stage 1: four butterflies.
+
+		x0, x7 = x0+x7, x0-x7
+		x1, x6 = x1+x6, x1-x6
+		x2, x5 = x2+x5, x2-x5
+		x3, x4 = x3+x4, x3-x4
+		// x[01234567] now Q11.18 in [-2040, 2040].
+
+		// Stage 2: two boxes and two butterflies.
+
+		x4, x7 = dctBox(x4>>14, x7>>14, c(cos3, 14), c(sin3, 14))
+		x5, x6 = dctBox(x5>>14, x6>>14, c(cos1, 14), c(sin1, 14))
+		// x[47] now Q12.18 in [-2830, 2830].
+		// x[56] now Q12.18 in [-2400, 2400].
+		x0, x3 = x0+x3, x0-x3
+		x1, x2 = x1+x2, x1-x2
+		// x[01234567] now Q12.18 in [-4080, 4080].
+
+		// Stage 3: one box and three butterflies.
+
+		x2, x3 = dctBox(x2>>14, x3>>14, c(sqrt2_cos6, 14), c(sqrt2_sin6, 14))
+		// x[23] now Q13.18 in [-7539, 7539].
+		x0, x1 = x0+x1, x0-x1
+		// x[01] now Q13.18 in [-8160, 8160].
+		x4, x6 = x4+x6, x4-x6
+		x7, x5 = x7+x5, x7-x5
+		// x[4567] now Q13.18 in [-5230, 5230].
+
+		// Stage 4: two √2 scalings and one butterfly.
+
+		x5 = (x5 >> 14) * c(sqrt2, 14)
+		x6 = (x6 >> 14) * c(sqrt2, 14)
+		// x[56] still Q13.18 in [-7397, 7397] (= 5230√2).
+		x7, x4 = x7+x4, x7-x4
+		// x[47] still Q13.18 in [-7395, 7395] (= 2040*3.6246).
+		// See “Note on 925” below.
+
+		// Cut from Q13.18 to Q13.0.
+		x0 = (x0 + 1<<17) >> 18
+		x1 = (x1 + 1<<17) >> 18
+		x2 = (x2 + 1<<17) >> 18
+		x3 = (x3 + 1<<17) >> 18
+		x4 = (x4 + 1<<17) >> 18
+		x5 = (x5 + 1<<17) >> 18
+		x6 = (x6 + 1<<17) >> 18
+		x7 = (x7 + 1<<17) >> 18
+
+		// Note: Unlike in fdctCols, saved all stores for the end
+		// because they are adjacent memory locations and some systems
+		// can use multiword stores.
+		x[0] = x0
+		x[1] = x7
+		x[2] = x2
+		x[3] = x5
+		x[4] = x1
+		x[5] = x6
+		x[6] = x3
+		x[7] = x4
+	}
+}
+
+// “Note on 925”, deferred from above to avoid interrupting code.
+//
+// In fdctCols, heading into stage 2, the values x4, x5, x6, x7 are in [-255, 255].
+// Let's call those specific values b4, b5, b6, b7, and trace how x[4567] evolve:
+//
+// Stage 2:
+//	x4 = b4*cos3 + b7*sin3
+//	x7 = -b4*sin3 + b7*cos3
+//	x5 = b5*cos1 + b6*sin1
+//	x6 = -b5*sin1 + b6*cos1
+//
+// Stage 3:
+//
+//	x4 = x4+x6 =  b4*cos3 + b7*sin3 - b5*sin1 + b6*cos1
+//	x6 = x4-x6 =  b4*cos3 + b7*sin3 + b5*sin1 - b6*cos1
+//	x7 = x7+x5 = -b4*sin3 + b7*cos3 + b5*cos1 + b6*sin1
+//	x5 = x7-x5 = -b4*sin3 + b7*cos3 - b5*cos1 - b6*sin1
+//
+// Stage 4:
+//
+//	x7 = x7+x4 = -b4*sin3 + b7*cos3 + b5*cos1 + b6*sin1 + b4*cos3 + b7*sin3 - b5*sin1 + b6*cos1
+//	   = b4*(cos3-sin3) + b5*(cos1-sin1) + b6*(cos1+sin1) + b7*(cos3+sin3)
+//	   < 255*(0.2759 + 0.7857 + 1.1759 + 1.3871) = 255*3.6246 < 925.
+//
+//	x4 = x7-x4 = -b4*sin3 + b7*cos3 + b5*cos1 + b6*sin1 - b4*cos3 - b7*sin3 + b5*sin1 - b6*cos1
+//	   = -b4*(cos3+sin3) + b5*(cos1+sin1) + b6*(sin1-cos1) + b7*(cos3-sin3)
+//	   < same 925.
+//
+// The fact that x5, x6 are also at most 925 is not a coincidence: we are computing
+// the same kinds of numbers for all four, just with different paths to them.
+//
+// In fdctRows, the same analysis applies, but the initial values are
+// in [-2040, 2040] instead of [-255, 255], so the bound is 2040*3.6246 < 7395.
+
+// idct implements the inverse DCT.
+// Inputs are UQ8.0; outputs are Q10.3.
+func idct(b *block) {
+	// A 2D IDCT is a 1D IDCT on rows followed by columns.
+	idctRows(b)
+	idctCols(b)
+}
+
+// idctRows applies the 1D IDCT to the rows of b.
+// Inputs are UQ8.0; outputs are Q9.20.
+func idctRows(b *block) {
+	for i := range 8 {
+		x := b[8*i : 8*i+8 : 8*i+8]
+		x0 := x[0]
+		x7 := x[1]
+		x2 := x[2]
+		x5 := x[3]
+		x1 := x[4]
+		x6 := x[5]
+		x3 := x[6]
+		x4 := x[7]
+
+		// Run FDCT backward.
+		// Independent operations have been reordered somewhat
+		// to make precision tracking easier.
+		//
+		// Note that “x0, x1 = x0+x1, x0-x1” is now a reverse butterfly
+		// and carries with it an implicit divide by two: the extra bit
+		// is added to the precision, not the value size.
+
+		// x[01234567] are UQ8.0 in [0, 255].
+
+		// Stages 4, 3, 2: x0, x1, x2, x3.
+
+		x0 <<= 17
+		x1 <<= 17
+		// x0, x1 now UQ8.17.
+		x0, x1 = x0+x1, x0-x1
+		// x0 now UQ8.18 in [0, 255].
+		// x1 now Q7.18 in [-127½, 127½].
+
+		// Note: (1/sqrt 2)*((cos 6*pi/16)+(sin 6*pi/16)) < 0.924, so no new high bit.
+		x2, x3 = dctBox(x2, x3, c(sqrt2inv_cos6, 18), -c(sqrt2inv_sin6, 18))
+		// x[23] now Q8.18 in [-236, 236].
+		x1, x2 = x1+x2, x1-x2
+		x0, x3 = x0+x3, x0-x3
+		// x[0123] now Q8.19 in [-246, 246].
+
+		// Stages 4, 3, 2: x4, x5, x6, x7.
+
+		x4 <<= 7
+		x7 <<= 7
+		// x[47] now UQ8.7
+		x7, x4 = x7+x4, x7-x4
+		// x7 now UQ8.8 in [0, 255].
+		// x4 now Q7.8 in [-127½, 127½].
+
+		x6 = x6 * c(sqrt2inv, 8)
+		x5 = x5 * c(sqrt2inv, 8)
+		// x[56] now UQ8.8 in [0, 181].
+		// Note that 1/√2 has five 0s in its binary representation after
+		// the 8th bit, so this multipliy is actually producing 12 bits of precision.
+
+		x7, x5 = x7+x5, x7-x5
+		x4, x6 = x4+x6, x4-x6
+		// x[4567] now Q8.9 in [-218, 218].
+
+		x4, x7 = dctBox(x4>>2, x7>>2, c(cos3, 12), -c(sin3, 12))
+		x5, x6 = dctBox(x5>>2, x6>>2, c(cos1, 12), -c(sin1, 12))
+		// x[4567] now Q9.19 in [-303, 303].
+
+		// Stage 1.
+
+		x0, x7 = x0+x7, x0-x7
+		x1, x6 = x1+x6, x1-x6
+		x2, x5 = x2+x5, x2-x5
+		x3, x4 = x3+x4, x3-x4
+		// x[01234567] now Q9.20 in [-275, 275].
+
+		// Note: we don't need all 20 bits of “precision”,
+		// but it is faster to let idctCols shift it away as part
+		// of other operations rather than downshift here.
+
+		x[0] = x0
+		x[1] = x1
+		x[2] = x2
+		x[3] = x3
+		x[4] = x4
+		x[5] = x5
+		x[6] = x6
+		x[7] = x7
+	}
+}
+
+// idctCols applies the 1D IDCT to the columns of b.
+// Inputs are Q9.20.
+// Outputs are Q10.3. That is, the result is the IDCT*8.
+func idctCols(b *block) {
+	for i := range 8 {
+		x0 := b[0*8+i]
+		x7 := b[1*8+i]
+		x2 := b[2*8+i]
+		x5 := b[3*8+i]
+		x1 := b[4*8+i]
+		x6 := b[5*8+i]
+		x3 := b[6*8+i]
+		x4 := b[7*8+i]
+
+		// x[012345678] are Q9.20.
+
+		// Start by adding 0.5 to x0 (the incoming DC signal).
+		// The butterflies will add it to all the other values,
+		// and then the final shifts will round properly.
+		x0 += 1 << 19
+
+		// Stages 4, 3, 2: x0, x1, x2, x3.
+
+		x0, x1 = (x0+x1)>>2, (x0-x1)>>2
+		// x[01] now Q9.19.
+		// Note: (1/sqrt 2)*((cos 6*pi/16)+(sin 6*pi/16)) < 1, so no new high bit.
+		x2, x3 = dctBox(x2>>13, x3>>13, c(sqrt2inv_cos6, 12), -c(sqrt2inv_sin6, 12))
+		// x[0123] now Q9.19.
+
+		x1, x2 = x1+x2, x1-x2
+		x0, x3 = x0+x3, x0-x3
+		// x[0123] now Q9.20.
+
+		// Stages 4, 3, 2: x4, x5, x6, x7.
+
+		x7, x4 = x7+x4, x7-x4
+		// x[47] now Q9.21.
+
+		x5 = (x5 >> 13) * c(sqrt2inv, 14)
+		x6 = (x6 >> 13) * c(sqrt2inv, 14)
+		// x[56] now Q9.21.
+
+		x7, x5 = x7+x5, x7-x5
+		x4, x6 = x4+x6, x4-x6
+		// x[4567] now Q9.22.
+
+		x4, x7 = dctBox(x4>>14, x7>>14, c(cos3, 12), -c(sin3, 12))
+		x5, x6 = dctBox(x5>>14, x6>>14, c(cos1, 12), -c(sin1, 12))
+		// x[4567] now Q10.20.
+
+		x0, x7 = x0+x7, x0-x7
+		x1, x6 = x1+x6, x1-x6
+		x2, x5 = x2+x5, x2-x5
+		x3, x4 = x3+x4, x3-x4
+		// x[01234567] now Q10.21.
+
+		x0 >>= 18
+		x1 >>= 18
+		x2 >>= 18
+		x3 >>= 18
+		x4 >>= 18
+		x5 >>= 18
+		x6 >>= 18
+		x7 >>= 18
+		// x[01234567] now Q10.3.
+
+		b[0*8+i] = x0
+		b[1*8+i] = x1
+		b[2*8+i] = x2
+		b[3*8+i] = x3
+		b[4*8+i] = x4
+		b[5*8+i] = x5
+		b[6*8+i] = x6
+		b[7*8+i] = x7
+	}
+}
--- a/src/image/jpeg/dct_test.go
+++ b/src/image/jpeg/dct_test.go
@ -7,20 +7,18 @@ package jpeg
 import (
 	"fmt"
 	"math"
+	"math/big"
 	"math/rand"
 	"strings"
 	"testing"
 )

 func benchmarkDCT(b *testing.B, f func(*block)) {
-	b.StopTimer()
-	blocks := make([]block, 0, b.N*len(testBlocks))
-	for i := 0; i < b.N; i++ {
-		blocks = append(blocks, testBlocks[:]...)
-	}
-	b.StartTimer()
-	for i := range blocks {
-		f(&blocks[i])
+	var blk block // avoid potential allocation in loop
+	for b.Loop() {
+		for _, blk = range testBlocks {
+			f(&blk)
+		}
 	}
 }

@ -32,11 +30,37 @@ func BenchmarkIDCT(b *testing.B) {
 	benchmarkDCT(b, idct)
 }

+const testSlowVsBig = true
+
 func TestDCT(t *testing.T) {
 	blocks := make([]block, len(testBlocks))
 	copy(blocks, testBlocks[:])

-	// Append some randomly generated blocks of varying sparseness.
+	// All zeros
+	blocks = append(blocks, block{})
+
+	// Every possible unit impulse.
+	for i := range blockSize {
+		var b block
+		b[i] = 255
+		blocks = append(blocks, b)
+	}
+
+	// All ones.
+	var ones block
+	for i := range ones {
+		ones[i] = 255
+	}
+	blocks = append(blocks, ones)
+
+	// Every possible inverted unit impulse.
+	for i := range blockSize {
+		ones[i] = 0
+		blocks = append(blocks, ones)
+		ones[i] = 255
+	}
+
+	// Some randomly generated blocks of varying sparseness.
 	r := rand.New(rand.NewSource(123))
 	for i := 0; i < 100; i++ {
 		b := block{}
@ -47,61 +71,84 @@ func TestDCT(t *testing.T) {
 		blocks = append(blocks, b)
 	}

-	// Check that the FDCT and IDCT functions are inverses, after a scale and
-	// level shift. Scaling reduces the rounding errors in the conversion from
-	// floats to ints.
-	for i, b := range blocks {
-		got, want := b, b
-		for j := range got {
-			got[j] = (got[j] - 128) * 8
-		}
-		slowFDCT(&got)
-		slowIDCT(&got)
-		for j := range got {
-			got[j] = got[j]/8 + 128
-		}
-		if differ(&got, &want) {
-			t.Errorf("i=%d: IDCT(FDCT)\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
+	// Check that the slow FDCT and IDCT functions are inverses,
+	// after a scale and level shift.
+	// Scaling reduces the rounding errors in the conversion.
+	// The “fast” ones are not inverses because the fast IDCT
+	// is optimized for 8-bit inputs, not full 16-bit ones.
+	slowRoundTrip := func(b *block) {
+		slowFDCT(b)
+		slowIDCT(b)
+		for j := range b {
+			b[j] = b[j]/8 + 128
 		}
 	}
+	nop := func(*block) {}
+	testDCT(t, "IDCT(FDCT)", blocks, slowRoundTrip, nop, 1, 8)
+
+	if testSlowVsBig {
+		testDCT(t, "slowFDCT", blocks, slowFDCT, slowerFDCT, 0, 64)
+		testDCT(t, "slowIDCT", blocks, slowIDCT, slowerIDCT, 0, 64)
+	}

 	// Check that the optimized and slow FDCT implementations agree.
-	// The fdct function already does a scale and level shift.
-	for i, b := range blocks {
-		got, want := b, b
-		fdct(&got)
-		for j := range want {
-			want[j] = (want[j] - 128) * 8
-		}
-		slowFDCT(&want)
-		if differ(&got, &want) {
-			t.Errorf("i=%d: FDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
-		}
-	}
-
-	// Check that the optimized and slow IDCT implementations agree.
-	for i, b := range blocks {
-		got, want := b, b
-		idct(&got)
-		slowIDCT(&want)
-		if differ(&got, &want) {
-			t.Errorf("i=%d: IDCT\nsrc\n%s\ngot\n%s\nwant\n%s\n", i, &b, &got, &want)
-		}
-	}
+	testDCT(t, "FDCT", blocks, fdct, slowFDCT, 1, 8)
+	testDCT(t, "IDCT", blocks, idct, slowIDCT, 1, 8)
 }

-// differ reports whether any pair-wise elements in b0 and b1 differ by 2 or
-// more. That tolerance is because there isn't a single definitive decoding of
-// a given JPEG image, even before the YCbCr to RGB conversion; implementations
+func testDCT(t *testing.T, name string, blocks []block, fhave, fwant func(*block), tolerance int32, maxCloseCalls int) {
+	t.Run(name, func(t *testing.T) {
+		totalClose := 0
+		for i, b := range blocks {
+			have, want := b, b
+			fhave(&have)
+			fwant(&want)
+			d, n := differ(&have, &want, tolerance)
+			if d >= 0 || n > maxCloseCalls {
+				fail := ""
+				if d >= 0 {
+					fail = fmt.Sprintf("diff at %d,%d", d/8, d%8)
+				}
+				if n > maxCloseCalls {
+					if fail != "" {
+						fail += "; "
+					}
+					fail += fmt.Sprintf("%d close calls", n)
+				}
+				t.Errorf("i=%d: %s (%s)\nsrc\n%s\nhave\n%s\nwant\n%s\n",
+					i, name, fail, &b, &have, &want)
+			}
+			totalClose += n
+		}
+		if tolerance > 0 {
+			t.Logf("%d/%d total close calls", totalClose, len(blocks)*blockSize)
+		}
+	})
+}
+
+// differ returns the index of the first pair-wise elements in b0 and b1
+// that differ by more than 'ok', along with the total number of elements
+// that differ by at least ok ("close calls").
+//
+// There isn't a single definitive decoding of a given JPEG image,
+// even before the YCbCr to RGB conversion; implementations
 // can have different IDCT rounding errors.
-func differ(b0, b1 *block) bool {
+//
+// If there are no differences, differ returns -1, 0.
+func differ(b0, b1 *block, ok int32) (index, closeCalls int) {
+	index = -1
 	for i := range b0 {
 		delta := b0[i] - b1[i]
-		if delta < -2 || +2 < delta {
-			return true
+		if delta < -ok || ok < delta {
+			if index < 0 {
+				index = i
+			}
+		}
+		if delta <= -ok || ok <= delta {
+			closeCalls++
 		}
 	}
-	return false
+	return
 }

 // alpha returns 1 if i is 0 and returns √2 otherwise.
@ -112,6 +159,14 @@ func alpha(i int) float64 {
 	return math.Sqrt2
 }

+// bigAlpha returns 1 if i is 0 and returns √2 otherwise.
+func bigAlpha(i int) *big.Float {
+	if i == 0 {
+		return bigFloat1
+	}
+	return bigFloatSqrt2
+}
+
 var cosines = [32]float64{
 	+1.0000000000000000000000000000000000000000000000000000000000000000, // cos(π/16 *  0)
 	+0.9807852804032304491261822361342390369739337308933360950029160885, // cos(π/16 *  1)
@ -150,6 +205,57 @@ var cosines = [32]float64{
 	+0.9807852804032304491261822361342390369739337308933360950029160885, // cos(π/16 * 31)
 }

+func bigFloat(s string) *big.Float {
+	f, ok := new(big.Float).SetString(s)
+	if !ok {
+		panic("bad float")
+	}
+	return f
+}
+
+var (
+	bigFloat1     = big.NewFloat(1)
+	bigFloatSqrt2 = bigFloat("1.41421356237309504880168872420969807856967187537694807317667974")
+)
+
+var bigCosines = [32]*big.Float{
+	bigFloat("+1.0000000000000000000000000000000000000000000000000000000000000000"), // cos(π/16 *  0)
+	bigFloat("+0.9807852804032304491261822361342390369739337308933360950029160885"), // cos(π/16 *  1)
+	bigFloat("+0.9238795325112867561281831893967882868224166258636424861150977312"), // cos(π/16 *  2)
+	bigFloat("+0.8314696123025452370787883776179057567385608119872499634461245902"), // cos(π/16 *  3)
+	bigFloat("+0.7071067811865475244008443621048490392848359376884740365883398689"), // cos(π/16 *  4)
+	bigFloat("+0.5555702330196022247428308139485328743749371907548040459241535282"), // cos(π/16 *  5)
+	bigFloat("+0.3826834323650897717284599840303988667613445624856270414338006356"), // cos(π/16 *  6)
+	bigFloat("+0.1950903220161282678482848684770222409276916177519548077545020894"), // cos(π/16 *  7)
+
+	bigFloat("-0.0000000000000000000000000000000000000000000000000000000000000000"), // cos(π/16 *  8)
+	bigFloat("-0.1950903220161282678482848684770222409276916177519548077545020894"), // cos(π/16 *  9)
+	bigFloat("-0.3826834323650897717284599840303988667613445624856270414338006356"), // cos(π/16 * 10)
+	bigFloat("-0.5555702330196022247428308139485328743749371907548040459241535282"), // cos(π/16 * 11)
+	bigFloat("-0.7071067811865475244008443621048490392848359376884740365883398689"), // cos(π/16 * 12)
+	bigFloat("-0.8314696123025452370787883776179057567385608119872499634461245902"), // cos(π/16 * 13)
+	bigFloat("-0.9238795325112867561281831893967882868224166258636424861150977312"), // cos(π/16 * 14)
+	bigFloat("-0.9807852804032304491261822361342390369739337308933360950029160885"), // cos(π/16 * 15)
+
+	bigFloat("-1.0000000000000000000000000000000000000000000000000000000000000000"), // cos(π/16 * 16)
+	bigFloat("-0.9807852804032304491261822361342390369739337308933360950029160885"), // cos(π/16 * 17)
+	bigFloat("-0.9238795325112867561281831893967882868224166258636424861150977312"), // cos(π/16 * 18)
+	bigFloat("-0.8314696123025452370787883776179057567385608119872499634461245902"), // cos(π/16 * 19)
+	bigFloat("-0.7071067811865475244008443621048490392848359376884740365883398689"), // cos(π/16 * 20)
+	bigFloat("-0.5555702330196022247428308139485328743749371907548040459241535282"), // cos(π/16 * 21)
+	bigFloat("-0.3826834323650897717284599840303988667613445624856270414338006356"), // cos(π/16 * 22)
+	bigFloat("-0.1950903220161282678482848684770222409276916177519548077545020894"), // cos(π/16 * 23)
+
+	bigFloat("+0.0000000000000000000000000000000000000000000000000000000000000000"), // cos(π/16 * 24)
+	bigFloat("+0.1950903220161282678482848684770222409276916177519548077545020894"), // cos(π/16 * 25)
+	bigFloat("+0.3826834323650897717284599840303988667613445624856270414338006356"), // cos(π/16 * 26)
+	bigFloat("+0.5555702330196022247428308139485328743749371907548040459241535282"), // cos(π/16 * 27)
+	bigFloat("+0.7071067811865475244008443621048490392848359376884740365883398689"), // cos(π/16 * 28)
+	bigFloat("+0.8314696123025452370787883776179057567385608119872499634461245902"), // cos(π/16 * 29)
+	bigFloat("+0.9238795325112867561281831893967882868224166258636424861150977312"), // cos(π/16 * 30)
+	bigFloat("+0.9807852804032304491261822361342390369739337308933360950029160885"), // cos(π/16 * 31)
+}
+
 // slowFDCT performs the 8*8 2-dimensional forward discrete cosine transform:
 //
 //	dst[u,v] = (1/8) * Σ_x Σ_y alpha(u) * alpha(v) * src[x,y] *
@ -160,24 +266,51 @@ var cosines = [32]float64{
 //
 // b acts as both dst and src.
 func slowFDCT(b *block) {
-	var dst [blockSize]float64
+	var dst block
 	for v := 0; v < 8; v++ {
 		for u := 0; u < 8; u++ {
 			sum := 0.0
 			for y := 0; y < 8; y++ {
 				for x := 0; x < 8; x++ {
-					sum += alpha(u) * alpha(v) * float64(b[8*y+x]) *
+					sum += alpha(u) * alpha(v) * float64(b[8*y+x]-128) *
 						cosines[((2*x+1)*u)%32] *
 						cosines[((2*y+1)*v)%32]
 				}
 			}
-			dst[8*v+u] = sum / 8
+			dst[8*v+u] = int32(math.Round(sum))
 		}
 	}
-	// Convert from float64 to int32.
-	for i := range dst {
-		b[i] = int32(dst[i] + 0.5)
+	*b = dst
+}
+
+// slowerFDCT is slowFDCT but using big.Floats to validate slowFDCT.
+func slowerFDCT(b *block) {
+	var dst block
+	for v := 0; v < 8; v++ {
+		for u := 0; u < 8; u++ {
+			sum := big.NewFloat(0)
+			for y := 0; y < 8; y++ {
+				for x := 0; x < 8; x++ {
+					f := big.NewFloat(float64(b[8*y+x] - 128))
+					f = new(big.Float).Mul(f, bigAlpha(u))
+					f = new(big.Float).Mul(f, bigAlpha(v))
+					f = new(big.Float).Mul(f, bigCosines[((2*x+1)*u)%32])
+					f = new(big.Float).Mul(f, bigCosines[((2*y+1)*v)%32])
+					sum = new(big.Float).Add(sum, f)
+				}
+			}
+			// Int64 truncates toward zero, so add ±0.5
+			// as needed to round
+			if sum.Sign() > 0 {
+				sum = new(big.Float).Add(sum, big.NewFloat(+0.5))
+			} else {
+				sum = new(big.Float).Add(sum, big.NewFloat(-0.5))
+			}
+			i, _ := sum.Int64()
+			dst[8*v+u] = int32(i)
+		}
 	}
+	*b = dst
 }

 // slowIDCT performs the 8*8 2-dimensional inverse discrete cosine transform:
@ -190,7 +323,7 @@ func slowFDCT(b *block) {
 //
 // b acts as both dst and src.
 func slowIDCT(b *block) {
-	var dst [blockSize]float64
+	var dst block
 	for y := 0; y < 8; y++ {
 		for x := 0; x < 8; x++ {
 			sum := 0.0
@ -201,13 +334,41 @@ func slowIDCT(b *block) {
 						cosines[((2*y+1)*v)%32]
 				}
 			}
-			dst[8*y+x] = sum / 8
+			dst[8*y+x] = int32(math.Round(sum / 8))
 		}
 	}
-	// Convert from float64 to int32.
-	for i := range dst {
-		b[i] = int32(dst[i] + 0.5)
+	*b = dst
+}
+
+// slowerIDCT is slowIDCT but using big.Floats to validate slowIDCT.
+func slowerIDCT(b *block) {
+	var dst block
+	for y := 0; y < 8; y++ {
+		for x := 0; x < 8; x++ {
+			sum := big.NewFloat(0)
+			for v := 0; v < 8; v++ {
+				for u := 0; u < 8; u++ {
+					f := big.NewFloat(float64(b[8*v+u]))
+					f = new(big.Float).Mul(f, bigAlpha(u))
+					f = new(big.Float).Mul(f, bigAlpha(v))
+					f = new(big.Float).Mul(f, bigCosines[((2*x+1)*u)%32])
+					f = new(big.Float).Mul(f, bigCosines[((2*y+1)*v)%32])
+					f = new(big.Float).Quo(f, big.NewFloat(8))
+					sum = new(big.Float).Add(sum, f)
+				}
+			}
+			// Int64 truncates toward zero, so add ±0.5
+			// as needed to round
+			if sum.Sign() > 0 {
+				sum = new(big.Float).Add(sum, big.NewFloat(+0.5))
+			} else {
+				sum = new(big.Float).Add(sum, big.NewFloat(-0.5))
+			}
+			i, _ := sum.Int64()
+			dst[8*y+x] = int32(i)
+		}
 	}
+	*b = dst
 }

 func (b *block) String() string {
--- a/src/image/jpeg/fdct.go
+++ b/src/image/jpeg/fdct.go
@ -1,192 +0,0 @@
-// Copyright 2011 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package jpeg
-
-// This file implements a Forward Discrete Cosine Transformation.
-
-/*
-It is based on the code in jfdctint.c from the Independent JPEG Group,
-found at http://www.ijg.org/files/jpegsrc.v8c.tar.gz.
-
-The "LEGAL ISSUES" section of the README in that archive says:
-
-In plain English:
-
-1. We don't promise that this software works.  (But if you find any bugs,
-   please let us know!)
-2. You can use this software for whatever you want.  You don't have to pay us.
-3. You may not pretend that you wrote this software.  If you use it in a
-   program, you must acknowledge somewhere in your documentation that
-   you've used the IJG code.
-
-In legalese:
-
-The authors make NO WARRANTY or representation, either express or implied,
-with respect to this software, its quality, accuracy, merchantability, or
-fitness for a particular purpose.  This software is provided "AS IS", and you,
-its user, assume the entire risk as to its quality and accuracy.
-
-This software is copyright (C) 1991-2011, Thomas G. Lane, Guido Vollbeding.
-All Rights Reserved except as specified below.
-
-Permission is hereby granted to use, copy, modify, and distribute this
-software (or portions thereof) for any purpose, without fee, subject to these
-conditions:
-(1) If any part of the source code for this software is distributed, then this
-README file must be included, with this copyright and no-warranty notice
-unaltered; and any additions, deletions, or changes to the original files
-must be clearly indicated in accompanying documentation.
-(2) If only executable code is distributed, then the accompanying
-documentation must state that "this software is based in part on the work of
-the Independent JPEG Group".
-(3) Permission for use of this software is granted only if the user accepts
-full responsibility for any undesirable consequences; the authors accept
-NO LIABILITY for damages of any kind.
-
-These conditions apply to any software derived from or based on the IJG code,
-not just to the unmodified library.  If you use our work, you ought to
-acknowledge us.
-
-Permission is NOT granted for the use of any IJG author's name or company name
-in advertising or publicity relating to this software or products derived from
-it.  This software may be referred to only as "the Independent JPEG Group's
-software".
-
-We specifically permit and encourage the use of this software as the basis of
-commercial products, provided that all warranty or liability claims are
-assumed by the product vendor.
-*/
-
-// Trigonometric constants in 13-bit fixed point format.
-const (
-	fix_0_298631336 = 2446
-	fix_0_390180644 = 3196
-	fix_0_541196100 = 4433
-	fix_0_765366865 = 6270
-	fix_0_899976223 = 7373
-	fix_1_175875602 = 9633
-	fix_1_501321110 = 12299
-	fix_1_847759065 = 15137
-	fix_1_961570560 = 16069
-	fix_2_053119869 = 16819
-	fix_2_562915447 = 20995
-	fix_3_072711026 = 25172
-)
-
-const (
-	constBits     = 13
-	pass1Bits     = 2
-	centerJSample = 128
-)
-
-// fdct performs a forward DCT on an 8x8 block of coefficients, including a
-// level shift.
-func fdct(b *block) {
-	// Pass 1: process rows.
-	for y := 0; y < 8; y++ {
-		y8 := y * 8
-		s := b[y8 : y8+8 : y8+8] // Small cap improves performance, see https://golang.org/issue/27857
-		x0 := s[0]
-		x1 := s[1]
-		x2 := s[2]
-		x3 := s[3]
-		x4 := s[4]
-		x5 := s[5]
-		x6 := s[6]
-		x7 := s[7]
-
-		tmp0 := x0 + x7
-		tmp1 := x1 + x6
-		tmp2 := x2 + x5
-		tmp3 := x3 + x4
-
-		tmp10 := tmp0 + tmp3
-		tmp12 := tmp0 - tmp3
-		tmp11 := tmp1 + tmp2
-		tmp13 := tmp1 - tmp2
-
-		tmp0 = x0 - x7
-		tmp1 = x1 - x6
-		tmp2 = x2 - x5
-		tmp3 = x3 - x4
-
-		s[0] = (tmp10 + tmp11 - 8*centerJSample) << pass1Bits
-		s[4] = (tmp10 - tmp11) << pass1Bits
-		z1 := (tmp12 + tmp13) * fix_0_541196100
-		z1 += 1 << (constBits - pass1Bits - 1)
-		s[2] = (z1 + tmp12*fix_0_765366865) >> (constBits - pass1Bits)
-		s[6] = (z1 - tmp13*fix_1_847759065) >> (constBits - pass1Bits)
-
-		tmp10 = tmp0 + tmp3
-		tmp11 = tmp1 + tmp2
-		tmp12 = tmp0 + tmp2
-		tmp13 = tmp1 + tmp3
-		z1 = (tmp12 + tmp13) * fix_1_175875602
-		z1 += 1 << (constBits - pass1Bits - 1)
-		tmp0 *= fix_1_501321110
-		tmp1 *= fix_3_072711026
-		tmp2 *= fix_2_053119869
-		tmp3 *= fix_0_298631336
-		tmp10 *= -fix_0_899976223
-		tmp11 *= -fix_2_562915447
-		tmp12 *= -fix_0_390180644
-		tmp13 *= -fix_1_961570560
-
-		tmp12 += z1
-		tmp13 += z1
-		s[1] = (tmp0 + tmp10 + tmp12) >> (constBits - pass1Bits)
-		s[3] = (tmp1 + tmp11 + tmp13) >> (constBits - pass1Bits)
-		s[5] = (tmp2 + tmp11 + tmp12) >> (constBits - pass1Bits)
-		s[7] = (tmp3 + tmp10 + tmp13) >> (constBits - pass1Bits)
-	}
-	// Pass 2: process columns.
-	// We remove pass1Bits scaling, but leave results scaled up by an overall factor of 8.
-	for x := 0; x < 8; x++ {
-		tmp0 := b[0*8+x] + b[7*8+x]
-		tmp1 := b[1*8+x] + b[6*8+x]
-		tmp2 := b[2*8+x] + b[5*8+x]
-		tmp3 := b[3*8+x] + b[4*8+x]
-
-		tmp10 := tmp0 + tmp3 + 1<<(pass1Bits-1)
-		tmp12 := tmp0 - tmp3
-		tmp11 := tmp1 + tmp2
-		tmp13 := tmp1 - tmp2
-
-		tmp0 = b[0*8+x] - b[7*8+x]
-		tmp1 = b[1*8+x] - b[6*8+x]
-		tmp2 = b[2*8+x] - b[5*8+x]
-		tmp3 = b[3*8+x] - b[4*8+x]
-
-		b[0*8+x] = (tmp10 + tmp11) >> pass1Bits
-		b[4*8+x] = (tmp10 - tmp11) >> pass1Bits
-
-		z1 := (tmp12 + tmp13) * fix_0_541196100
-		z1 += 1 << (constBits + pass1Bits - 1)
-		b[2*8+x] = (z1 + tmp12*fix_0_765366865) >> (constBits + pass1Bits)
-		b[6*8+x] = (z1 - tmp13*fix_1_847759065) >> (constBits + pass1Bits)
-
-		tmp10 = tmp0 + tmp3
-		tmp11 = tmp1 + tmp2
-		tmp12 = tmp0 + tmp2
-		tmp13 = tmp1 + tmp3
-		z1 = (tmp12 + tmp13) * fix_1_175875602
-		z1 += 1 << (constBits + pass1Bits - 1)
-		tmp0 *= fix_1_501321110
-		tmp1 *= fix_3_072711026
-		tmp2 *= fix_2_053119869
-		tmp3 *= fix_0_298631336
-		tmp10 *= -fix_0_899976223
-		tmp11 *= -fix_2_562915447
-		tmp12 *= -fix_0_390180644
-		tmp13 *= -fix_1_961570560
-
-		tmp12 += z1
-		tmp13 += z1
-		b[1*8+x] = (tmp0 + tmp10 + tmp12) >> (constBits + pass1Bits)
-		b[3*8+x] = (tmp1 + tmp11 + tmp13) >> (constBits + pass1Bits)
-		b[5*8+x] = (tmp2 + tmp11 + tmp12) >> (constBits + pass1Bits)
-		b[7*8+x] = (tmp3 + tmp10 + tmp13) >> (constBits + pass1Bits)
-	}
-}
--- a/src/image/jpeg/idct.go
+++ b/src/image/jpeg/idct.go
@ -1,194 +0,0 @@
-// Copyright 2009 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package jpeg
-
-// This is a Go translation of idct.c from
-//
-// http://standards.iso.org/ittf/PubliclyAvailableStandards/ISO_IEC_13818-4_2004_Conformance_Testing/Video/verifier/mpeg2decode_960109.tar.gz
-//
-// which carries the following notice:
-
-/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
-
-/*
- * Disclaimer of Warranty
- *
- * These software programs are available to the user without any license fee or
- * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
- * any and all warranties, whether express, implied, or statuary, including any
- * implied warranties or merchantability or of fitness for a particular
- * purpose.  In no event shall the copyright-holder be liable for any
- * incidental, punitive, or consequential damages of any kind whatsoever
- * arising from the use of these programs.
- *
- * This disclaimer of warranty extends to the user of these programs and user's
- * customers, employees, agents, transferees, successors, and assigns.
- *
- * The MPEG Software Simulation Group does not represent or warrant that the
- * programs furnished hereunder are free of infringement of any third-party
- * patents.
- *
- * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
- * are subject to royalty fees to patent holders.  Many of these patents are
- * general enough such that they are unavoidable regardless of implementation
- * design.
- *
- */
-
-const blockSize = 64 // A DCT block is 8x8.
-
-type block [blockSize]int32
-
-const (
-	w1 = 2841 // 2048*sqrt(2)*cos(1*pi/16)
-	w2 = 2676 // 2048*sqrt(2)*cos(2*pi/16)
-	w3 = 2408 // 2048*sqrt(2)*cos(3*pi/16)
-	w5 = 1609 // 2048*sqrt(2)*cos(5*pi/16)
-	w6 = 1108 // 2048*sqrt(2)*cos(6*pi/16)
-	w7 = 565  // 2048*sqrt(2)*cos(7*pi/16)
-
-	w1pw7 = w1 + w7
-	w1mw7 = w1 - w7
-	w2pw6 = w2 + w6
-	w2mw6 = w2 - w6
-	w3pw5 = w3 + w5
-	w3mw5 = w3 - w5
-
-	r2 = 181 // 256/sqrt(2)
-)
-
-// idct performs a 2-D Inverse Discrete Cosine Transformation.
-//
-// The input coefficients should already have been multiplied by the
-// appropriate quantization table. We use fixed-point computation, with the
-// number of bits for the fractional component varying over the intermediate
-// stages.
-//
-// For more on the actual algorithm, see Z. Wang, "Fast algorithms for the
-// discrete W transform and for the discrete Fourier transform", IEEE Trans. on
-// ASSP, Vol. ASSP- 32, pp. 803-816, Aug. 1984.
-func idct(src *block) {
-	// Horizontal 1-D IDCT.
-	for y := 0; y < 8; y++ {
-		y8 := y * 8
-		s := src[y8 : y8+8 : y8+8] // Small cap improves performance, see https://golang.org/issue/27857
-		// If all the AC components are zero, then the IDCT is trivial.
-		if s[1] == 0 && s[2] == 0 && s[3] == 0 &&
-			s[4] == 0 && s[5] == 0 && s[6] == 0 && s[7] == 0 {
-			dc := s[0] << 3
-			s[0] = dc
-			s[1] = dc
-			s[2] = dc
-			s[3] = dc
-			s[4] = dc
-			s[5] = dc
-			s[6] = dc
-			s[7] = dc
-			continue
-		}
-
-		// Prescale.
-		x0 := (s[0] << 11) + 128
-		x1 := s[4] << 11
-		x2 := s[6]
-		x3 := s[2]
-		x4 := s[1]
-		x5 := s[7]
-		x6 := s[5]
-		x7 := s[3]
-
-		// Stage 1.
-		x8 := w7 * (x4 + x5)
-		x4 = x8 + w1mw7*x4
-		x5 = x8 - w1pw7*x5
-		x8 = w3 * (x6 + x7)
-		x6 = x8 - w3mw5*x6
-		x7 = x8 - w3pw5*x7
-
-		// Stage 2.
-		x8 = x0 + x1
-		x0 -= x1
-		x1 = w6 * (x3 + x2)
-		x2 = x1 - w2pw6*x2
-		x3 = x1 + w2mw6*x3
-		x1 = x4 + x6
-		x4 -= x6
-		x6 = x5 + x7
-		x5 -= x7
-
-		// Stage 3.
-		x7 = x8 + x3
-		x8 -= x3
-		x3 = x0 + x2
-		x0 -= x2
-		x2 = (r2*(x4+x5) + 128) >> 8
-		x4 = (r2*(x4-x5) + 128) >> 8
-
-		// Stage 4.
-		s[0] = (x7 + x1) >> 8
-		s[1] = (x3 + x2) >> 8
-		s[2] = (x0 + x4) >> 8
-		s[3] = (x8 + x6) >> 8
-		s[4] = (x8 - x6) >> 8
-		s[5] = (x0 - x4) >> 8
-		s[6] = (x3 - x2) >> 8
-		s[7] = (x7 - x1) >> 8
-	}
-
-	// Vertical 1-D IDCT.
-	for x := 0; x < 8; x++ {
-		// Similar to the horizontal 1-D IDCT case, if all the AC components are zero, then the IDCT is trivial.
-		// However, after performing the horizontal 1-D IDCT, there are typically non-zero AC components, so
-		// we do not bother to check for the all-zero case.
-		s := src[x : x+57 : x+57] // Small cap improves performance, see https://golang.org/issue/27857
-
-		// Prescale.
-		y0 := (s[8*0] << 8) + 8192
-		y1 := s[8*4] << 8
-		y2 := s[8*6]
-		y3 := s[8*2]
-		y4 := s[8*1]
-		y5 := s[8*7]
-		y6 := s[8*5]
-		y7 := s[8*3]
-
-		// Stage 1.
-		y8 := w7*(y4+y5) + 4
-		y4 = (y8 + w1mw7*y4) >> 3
-		y5 = (y8 - w1pw7*y5) >> 3
-		y8 = w3*(y6+y7) + 4
-		y6 = (y8 - w3mw5*y6) >> 3
-		y7 = (y8 - w3pw5*y7) >> 3
-
-		// Stage 2.
-		y8 = y0 + y1
-		y0 -= y1
-		y1 = w6*(y3+y2) + 4
-		y2 = (y1 - w2pw6*y2) >> 3
-		y3 = (y1 + w2mw6*y3) >> 3
-		y1 = y4 + y6
-		y4 -= y6
-		y6 = y5 + y7
-		y5 -= y7
-
-		// Stage 3.
-		y7 = y8 + y3
-		y8 -= y3
-		y3 = y0 + y2
-		y0 -= y2
-		y2 = (r2*(y4+y5) + 128) >> 8
-		y4 = (r2*(y4-y5) + 128) >> 8
-
-		// Stage 4.
-		s[8*0] = (y7 + y1) >> 14
-		s[8*1] = (y3 + y2) >> 14
-		s[8*2] = (y0 + y4) >> 14
-		s[8*3] = (y8 + y6) >> 14
-		s[8*4] = (y8 - y6) >> 14
-		s[8*5] = (y0 - y4) >> 14
-		s[8*6] = (y3 - y2) >> 14
-		s[8*7] = (y7 - y1) >> 14
-	}
-}
--- a/src/image/jpeg/writer_test.go
+++ b/src/image/jpeg/writer_test.go
@ -154,8 +154,8 @@ func TestWriter(t *testing.T) {
 			continue
 		}
 		// Compare the average delta to the tolerance level.
-		if averageDelta(m0, m1) > tc.tolerance {
-			t.Errorf("%s, quality=%d: average delta is too high", tc.filename, tc.quality)
+		if d := averageDelta(m0, m1); d > tc.tolerance {
+			t.Errorf("%s, quality=%d: average delta is too high (%d > %d)", tc.filename, tc.quality, d, tc.tolerance)
 			continue
 		}
 	}
--- a/src/internal/abi/iface.go
+++ b/src/internal/abi/iface.go
@ -31,3 +31,11 @@ type NonEmptyInterface struct {
 	ITab *ITab
 	Data unsafe.Pointer
 }
+
+// CommonInterface describes the layout of both [EmptyInterface] and [NonEmptyInterface].
+type CommonInterface struct {
+	// Either an *ITab or a *Type, unexported to avoid accidental use.
+	_ unsafe.Pointer
+
+	Data unsafe.Pointer
+}
--- a/src/internal/runtime/gc/sizeclasses.go
+++ b/src/internal/runtime/gc/sizeclasses.go
@ -91,6 +91,8 @@ const (
 	PageShift          = 13
 	MaxObjsPerSpan     = 1024
 	MaxSizeClassNPages = 10
+	TinySize           = 16
+	TinySizeClass      = 2
 )

 var SizeClassToSize = [NumSizeClasses]uint16{0, 8, 16, 24, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, 480, 512, 576, 640, 704, 768, 896, 1024, 1152, 1280, 1408, 1536, 1792, 2048, 2304, 2688, 3072, 3200, 3456, 4096, 4864, 5376, 6144, 6528, 6784, 6912, 8192, 9472, 9728, 10240, 10880, 12288, 13568, 14336, 16384, 18432, 19072, 20480, 21760, 24576, 27264, 28672, 32768}
--- a/src/internal/types/testdata/check/builtins0.go
+++ b/src/internal/types/testdata/check/builtins0.go
@ -609,24 +609,48 @@ func min2() {
 	)
 }

-func new1() {
-	_ = new() // ERROR "not enough arguments"
-	_ = new(1, 2) // ERROR "too many arguments"
-	_ = new("foo" /* ERROR "not a type" */)
-	p := new(float64)
-	_ = new(struct{ x, y int })
-	q := new(*float64)
-	_ = *p == **q
-	new /* ERROR "not used" */ (int)
-        _ = &new /* ERROR "cannot take address" */ (int)
+func newInvalid() {
+	f2 := func() (x, y int) { return }

-	_ = new(int... /* ERROR "invalid use of ..." */ )
+	_ = new()     // ERROR "not enough arguments"
+	_ = new(1, 2) // ERROR "too many arguments"
+	new /* ERROR "not used" */ (int)
+	_ = &new /* ERROR "cannot take address" */ (int)
+	_ = new(int... /* ERROR "invalid use of ..." */)
+	_ = new(f0 /* ERROR "f0() (no value) used as value or type" */ ())
+	_ = new(len /* ERROR "len (built-in) must be called" */)
+	_ = new(1 /* ERROR "argument to new (overflows)" */ << 70)
+	_ = new(f2 /* ERRORx "multiple-value.*in single-value context" */ ())
 }

-func new2() {
+// new(T)
+func newType() {
+	_ = new(struct{ x, y int })
+
+	p := new(float64)
+	q := new(*float64)
+	_ = *p == **q
+}
+
+// new(expr), added in go1.26
+func newExpr() {
 	f1 := func() (x []int) { return }
-	_ = new(f0 /* ERROR "not a type" */ ())
-	_ = new(f1 /* ERROR "not a type" */ ())
+	var (
+		_ *[]int        = new(f1())
+		_ *func() []int = new(f1)
+		_ *bool         = new(false)
+		_ *int          = new(123)
+		_ *float64      = new(1.0)
+		_ *uint         = new(uint(3))
+		_ *rune         = new('a')
+		_ *string       = new("A")
+		_ *struct{}     = new(struct{}{})
+		_ *any          = new(any)
+
+		// from issue 43125
+		_ = new(-1)
+		_ = new(1 + 1)
+	)
 }

 func panic1() {
--- a/src/internal/types/testdata/check/go1_25.go
+++ b/src/internal/types/testdata/check/go1_25.go
@ -0,0 +1,13 @@
+// -lang=go1.25
+
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Check Go language version-specific errors.
+
+//go:build go1.25
+
+package p
+
+var _ = new /* ERROR "new(expr) requires go1.26 or later" */ (123)
--- a/src/internal/types/testdata/fixedbugs/issue43125.go
+++ b/src/internal/types/testdata/fixedbugs/issue43125.go
@ -1,8 +0,0 @@
-// Copyright 2021 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package p
-
-var _ = new(- /* ERROR "not a type" */ 1)
-var _ = new(1 /* ERROR "not a type" */ + 1)
--- a/src/reflect/all_test.go
+++ b/src/reflect/all_test.go
@ -8783,6 +8783,9 @@ func TestTypeAssertAllocs(t *testing.T) {

 	typeAssertAllocs[time.Time](t, ValueOf(new(time.Time)).Elem(), 0)
 	typeAssertAllocs[time.Time](t, ValueOf(*new(time.Time)), 0)
+
+	type I interface{ foo() }
+	typeAssertAllocs[I](t, ValueOf(new(string)).Elem(), 0) // assert fail doesn't alloc
 }

 func typeAssertAllocs[T any](t *testing.T, val Value, wantAllocs int) {
--- a/src/reflect/value.go
+++ b/src/reflect/value.go
@ -120,10 +120,16 @@ func (v Value) pointer() unsafe.Pointer {

 // packEface converts v to the empty interface.
 func packEface(v Value) any {
+	return *(*any)(unsafe.Pointer(&abi.EmptyInterface{
+		Type: v.typ(),
+		Data: packEfaceData(v),
+	}))
+}
+
+// packEfaceData is a helper that packs the Data part of an interface,
+// if v were to be stored in an interface.
+func packEfaceData(v Value) unsafe.Pointer {
 	t := v.typ()
-	// Declare e as a struct (and not pointer to struct) to help escape analysis.
-	e := abi.EmptyInterface{}
-	// First, fill in the data portion of the interface.
 	switch {
 	case !t.IsDirectIface():
 		if v.flag&flagIndir == 0 {
@ -136,24 +142,20 @@ func packEface(v Value) any {
 			typedmemmove(t, c, ptr)
 			ptr = c
 		}
-		e.Data = ptr
+		return ptr
 	case v.flag&flagIndir != 0:
 		// Value is indirect, but interface is direct. We need
 		// to load the data at v.ptr into the interface data word.
-		e.Data = *(*unsafe.Pointer)(v.ptr)
+		return *(*unsafe.Pointer)(v.ptr)
 	default:
 		// Value is direct, and so is the interface.
-		e.Data = v.ptr
+		return v.ptr
 	}
-	// Now, fill in the type portion.
-	e.Type = t
-	return *(*any)(unsafe.Pointer(&e))
 }

 // unpackEface converts the empty interface i to a Value.
 func unpackEface(i any) Value {
 	e := (*abi.EmptyInterface)(unsafe.Pointer(&i))
-	// NOTE: don't read e.word until we know whether it is really a pointer or not.
 	t := e.Type
 	if t == nil {
 		return Value{}
@ -1544,8 +1546,18 @@ func TypeAssert[T any](v Value) (T, bool) {
 	//	TypeAssert[any](ValueOf(1)) == ValueOf(1).Interface().(any)
 	//	TypeAssert[error](ValueOf(&someError{})) == ValueOf(&someError{}).Interface().(error)
 	if typ.Kind() == abi.Interface {
-		v, ok := packEface(v).(T)
-		return v, ok
+		// To avoid allocating memory, in case the type assertion fails,
+		// first do the type assertion with a nil Data pointer.
+		iface := *(*any)(unsafe.Pointer(&abi.EmptyInterface{Type: v.typ(), Data: nil}))
+		if out, ok := iface.(T); ok {
+			// Now populate the Data field properly, we update the Data ptr
+			// directly to avoid an additional type asertion. We can re-use the
+			// itab we already got from the runtime (through the previous type assertion).
+			(*abi.CommonInterface)(unsafe.Pointer(&out)).Data = packEfaceData(v)
+			return out, true
+		}
+		var zero T
+		return zero, false
 	}

 	// Both v and T must be concrete types.
--- a/src/runtime/_mkmalloc/constants.go
+++ b/src/runtime/_mkmalloc/constants.go
@ -0,0 +1,29 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+const (
+	// Constants that we use and will transfer to the runtime.
+	minHeapAlign = 8
+	maxSmallSize = 32 << 10
+	smallSizeDiv = 8
+	smallSizeMax = 1024
+	largeSizeDiv = 128
+	pageShift    = 13
+	tinySize     = 16
+
+	// Derived constants.
+	pageSize = 1 << pageShift
+)
+
+const (
+	maxPtrSize = max(4, 8)
+	maxPtrBits = 8 * maxPtrSize
+
+	// Maximum size smallScanNoHeader would be called for, which is the
+	// maximum value gc.MinSizeForMallocHeader can have on any platform.
+	// gc.MinSizeForMallocHeader is defined as goarch.PtrSize * goarch.PtrBits.
+	smallScanNoHeaderMax = maxPtrSize * maxPtrBits
+)
--- a/src/runtime/_mkmalloc/go.mod
+++ b/src/runtime/_mkmalloc/go.mod
@ -0,0 +1,5 @@
+module runtime/_mkmalloc
+
+go 1.24
+
+require golang.org/x/tools v0.33.0
--- a/src/runtime/_mkmalloc/go.sum
+++ b/src/runtime/_mkmalloc/go.sum
@ -0,0 +1,2 @@
+golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc=
+golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI=
--- a/src/runtime/_mkmalloc/mkmalloc.go
+++ b/src/runtime/_mkmalloc/mkmalloc.go
@ -0,0 +1,605 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"flag"
+	"fmt"
+	"go/ast"
+	"go/format"
+	"go/parser"
+	"go/token"
+	"log"
+	"os"
+	"strings"
+
+	"golang.org/x/tools/go/ast/astutil"
+
+	internalastutil "runtime/_mkmalloc/astutil"
+)
+
+var stdout = flag.Bool("stdout", false, "write sizeclasses source to stdout instead of sizeclasses.go")
+
+func makeSizeToSizeClass(classes []class) []uint8 {
+	sc := uint8(0)
+	ret := make([]uint8, smallScanNoHeaderMax+1)
+	for i := range ret {
+		if i > classes[sc].size {
+			sc++
+		}
+		ret[i] = sc
+	}
+	return ret
+}
+
+func main() {
+	log.SetFlags(0)
+	log.SetPrefix("mkmalloc: ")
+
+	classes := makeClasses()
+	sizeToSizeClass := makeSizeToSizeClass(classes)
+
+	if *stdout {
+		if _, err := os.Stdout.Write(mustFormat(generateSizeClasses(classes))); err != nil {
+			log.Fatal(err)
+		}
+		return
+	}
+
+	sizeclasesesfile := "../../internal/runtime/gc/sizeclasses.go"
+	if err := os.WriteFile(sizeclasesesfile, mustFormat(generateSizeClasses(classes)), 0666); err != nil {
+		log.Fatal(err)
+	}
+
+	outfile := "../malloc_generated.go"
+	if err := os.WriteFile(outfile, mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass))), 0666); err != nil {
+		log.Fatal(err)
+	}
+
+	tablefile := "../malloc_tables_generated.go"
+	if err := os.WriteFile(tablefile, mustFormat(generateTable(sizeToSizeClass)), 0666); err != nil {
+		log.Fatal(err)
+	}
+}
+
+// withLineNumbers returns b with line numbers added to help debugging.
+func withLineNumbers(b []byte) []byte {
+	var buf bytes.Buffer
+	i := 1
+	for line := range bytes.Lines(b) {
+		fmt.Fprintf(&buf, "%d: %s", i, line)
+		i++
+	}
+	return buf.Bytes()
+}
+
+// mustFormat formats the input source, or exits if there's an error.
+func mustFormat(b []byte) []byte {
+	formatted, err := format.Source(b)
+	if err != nil {
+		log.Fatalf("error formatting source: %v\nsource:\n%s\n", err, withLineNumbers(b))
+	}
+	return formatted
+}
+
+// generatorConfig is the configuration for the generator. It uses the given file to find
+// its templates, and generates each of the functions specified by specs.
+type generatorConfig struct {
+	file  string
+	specs []spec
+}
+
+// spec is the specification for a function for the inliner to produce. The function gets
+// the given name, and is produced by starting with the function with the name given by
+// templateFunc and applying each of the ops.
+type spec struct {
+	name         string
+	templateFunc string
+	ops          []op
+}
+
+// replacementKind specifies the operation to ben done by a op.
+type replacementKind int
+
+const (
+	inlineFunc = replacementKind(iota)
+	subBasicLit
+)
+
+// op is a single inlining operation for the inliner. Any calls to the function
+// from are replaced with the inlined body of to. For non-functions, uses of from are
+// replaced with the basic literal expression given by to.
+type op struct {
+	kind replacementKind
+	from string
+	to   string
+}
+
+func smallScanNoHeaderSCFuncName(sc, scMax uint8) string {
+	if sc == 0 || sc > scMax {
+		return "mallocPanic"
+	}
+	return fmt.Sprintf("mallocgcSmallScanNoHeaderSC%d", sc)
+}
+
+func tinyFuncName(size uintptr) string {
+	if size == 0 || size > smallScanNoHeaderMax {
+		return "mallocPanic"
+	}
+	return fmt.Sprintf("mallocTiny%d", size)
+}
+
+func smallNoScanSCFuncName(sc, scMax uint8) string {
+	if sc < 2 || sc > scMax {
+		return "mallocPanic"
+	}
+	return fmt.Sprintf("mallocgcSmallNoScanSC%d", sc)
+}
+
+// specializedMallocConfig produces an inlining config to stamp out the definitions of the size-specialized
+// malloc functions to be written by mkmalloc.
+func specializedMallocConfig(classes []class, sizeToSizeClass []uint8) generatorConfig {
+	config := generatorConfig{file: "../malloc_stubs.go"}
+
+	// Only generate specialized functions for sizes that don't have
+	// a header on 64-bit platforms. (They may have a header on 32-bit, but
+	// we will fall back to the non-specialized versions in that case)
+	scMax := sizeToSizeClass[smallScanNoHeaderMax]
+
+	str := fmt.Sprint
+
+	// allocations with pointer bits
+	{
+		const noscan = 0
+		for sc := uint8(0); sc <= scMax; sc++ {
+			if sc == 0 {
+				continue
+			}
+			name := smallScanNoHeaderSCFuncName(sc, scMax)
+			elemsize := classes[sc].size
+			config.specs = append(config.specs, spec{
+				templateFunc: "mallocStub",
+				name:         name,
+				ops: []op{
+					{inlineFunc, "inlinedMalloc", "smallScanNoHeaderStub"},
+					{inlineFunc, "heapSetTypeNoHeaderStub", "heapSetTypeNoHeaderStub"},
+					{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
+					{inlineFunc, "writeHeapBitsSmallStub", "writeHeapBitsSmallStub"},
+					{subBasicLit, "elemsize_", str(elemsize)},
+					{subBasicLit, "sizeclass_", str(sc)},
+					{subBasicLit, "noscanint_", str(noscan)},
+				},
+			})
+		}
+	}
+
+	// allocations without pointer bits
+	{
+		const noscan = 1
+
+		// tiny
+		tinySizeClass := sizeToSizeClass[tinySize]
+		for s := range uintptr(16) {
+			if s == 0 {
+				continue
+			}
+			name := tinyFuncName(s)
+			elemsize := classes[tinySizeClass].size
+			config.specs = append(config.specs, spec{
+				templateFunc: "mallocStub",
+				name:         name,
+				ops: []op{
+					{inlineFunc, "inlinedMalloc", "tinyStub"},
+					{inlineFunc, "nextFreeFastTiny", "nextFreeFastTiny"},
+					{subBasicLit, "elemsize_", str(elemsize)},
+					{subBasicLit, "sizeclass_", str(tinySizeClass)},
+					{subBasicLit, "size_", str(s)},
+					{subBasicLit, "noscanint_", str(noscan)},
+				},
+			})
+		}
+
+		// non-tiny
+		for sc := uint8(tinySizeClass); sc <= scMax; sc++ {
+			name := smallNoScanSCFuncName(sc, scMax)
+			elemsize := classes[sc].size
+			config.specs = append(config.specs, spec{
+				templateFunc: "mallocStub",
+				name:         name,
+				ops: []op{
+					{inlineFunc, "inlinedMalloc", "smallNoScanStub"},
+					{inlineFunc, "nextFreeFastStub", "nextFreeFastStub"},
+					{subBasicLit, "elemsize_", str(elemsize)},
+					{subBasicLit, "sizeclass_", str(sc)},
+					{subBasicLit, "noscanint_", str(noscan)},
+				},
+			})
+		}
+	}
+
+	return config
+}
+
+// inline applies the inlining operations given by the config.
+func inline(config generatorConfig) []byte {
+	var out bytes.Buffer
+
+	// Read the template file in.
+	fset := token.NewFileSet()
+	f, err := parser.ParseFile(fset, config.file, nil, 0)
+	if err != nil {
+		log.Fatalf("parsing %s: %v", config.file, err)
+	}
+
+	// Collect the function and import declarations. The function
+	// declarations in the template file provide both the templates
+	// that will be stamped out, and the functions that will be inlined
+	// into them. The imports from the template file will be copied
+	// straight to the output.
+	funcDecls := map[string]*ast.FuncDecl{}
+	importDecls := []*ast.GenDecl{}
+	for _, decl := range f.Decls {
+		switch decl := decl.(type) {
+		case *ast.FuncDecl:
+			funcDecls[decl.Name.Name] = decl
+		case *ast.GenDecl:
+			if decl.Tok.String() == "import" {
+				importDecls = append(importDecls, decl)
+				continue
+			}
+		}
+	}
+
+	// Write out the package and import declarations.
+	out.WriteString("// Code generated by mkmalloc.go; DO NOT EDIT.\n\n")
+	out.WriteString("package " + f.Name.Name + "\n\n")
+	for _, importDecl := range importDecls {
+		out.Write(mustFormatNode(fset, importDecl))
+		out.WriteString("\n\n")
+	}
+
+	// Produce each of the inlined functions specified by specs.
+	for _, spec := range config.specs {
+		// Start with a renamed copy of the template function.
+		containingFuncCopy := internalastutil.CloneNode(funcDecls[spec.templateFunc])
+		if containingFuncCopy == nil {
+			log.Fatal("did not find", spec.templateFunc)
+		}
+		containingFuncCopy.Name.Name = spec.name
+
+		// Apply each of the ops given by the specs
+		stamped := ast.Node(containingFuncCopy)
+		for _, repl := range spec.ops {
+			if toDecl, ok := funcDecls[repl.to]; ok {
+				stamped = inlineFunction(stamped, repl.from, toDecl)
+			} else {
+				stamped = substituteWithBasicLit(stamped, repl.from, repl.to)
+			}
+		}
+
+		out.Write(mustFormatNode(fset, stamped))
+		out.WriteString("\n\n")
+	}
+
+	return out.Bytes()
+}
+
+// substituteWithBasicLit recursively renames identifiers in the provided AST
+// according to 'from' and 'to'.
+func substituteWithBasicLit(node ast.Node, from, to string) ast.Node {
+	// The op is a substitution of an identifier with an basic literal.
+	toExpr, err := parser.ParseExpr(to)
+	if err != nil {
+		log.Fatalf("parsing expr %q: %v", to, err)
+	}
+	if _, ok := toExpr.(*ast.BasicLit); !ok {
+		log.Fatalf("op 'to' expr %q is not a basic literal", to)
+	}
+	return astutil.Apply(node, func(cursor *astutil.Cursor) bool {
+		if isIdentWithName(cursor.Node(), from) {
+			cursor.Replace(toExpr)
+		}
+		return true
+	}, nil)
+}
+
+// inlineFunction recursively replaces calls to the function 'from' with the body of the function
+// 'toDecl'. All calls to 'from' must appear in assignment statements.
+// The replacement is very simple: it doesn't substitute the arguments for the parameters, so the
+// arguments to the function call must be the same identifier as the parameters to the function
+// declared by 'toDecl'. If there are any calls to from where that's not the case there will be a fatal error.
+func inlineFunction(node ast.Node, from string, toDecl *ast.FuncDecl) ast.Node {
+	return astutil.Apply(node, func(cursor *astutil.Cursor) bool {
+		switch node := cursor.Node().(type) {
+		case *ast.AssignStmt:
+			// TODO(matloob) CHECK function args have same name
+			// as parameters (or parameter is "_").
+			if len(node.Rhs) == 1 && isCallTo(node.Rhs[0], from) {
+				args := node.Rhs[0].(*ast.CallExpr).Args
+				if !argsMatchParameters(args, toDecl.Type.Params) {
+					log.Fatalf("applying op: arguments to %v don't match parameter names of %v: %v", from, toDecl.Name, debugPrint(args...))
+				}
+				replaceAssignment(cursor, node, toDecl)
+			}
+			return false
+		case *ast.CallExpr:
+			// double check that all calls to from appear within an assignment
+			if isCallTo(node, from) {
+				if _, ok := cursor.Parent().(*ast.AssignStmt); !ok {
+					log.Fatalf("applying op: all calls to function %q being replaced must appear in an assignment statement, appears in %T", from, cursor.Parent())
+				}
+			}
+		}
+		return true
+	}, nil)
+}
+
+// argsMatchParameters reports whether the arguments given by args are all identifiers
+// whose names are the same as the corresponding parameters in params.
+func argsMatchParameters(args []ast.Expr, params *ast.FieldList) bool {
+	var paramIdents []*ast.Ident
+	for _, f := range params.List {
+		paramIdents = append(paramIdents, f.Names...)
+	}
+
+	if len(args) != len(paramIdents) {
+		return false
+	}
+
+	for i := range args {
+		if !isIdentWithName(args[i], paramIdents[i].Name) {
+			return false
+		}
+	}
+
+	return true
+}
+
+// isIdentWithName reports whether the expression is an identifier with the given name.
+func isIdentWithName(expr ast.Node, name string) bool {
+	ident, ok := expr.(*ast.Ident)
+	if !ok {
+		return false
+	}
+	return ident.Name == name
+}
+
+// isCallTo reports whether the expression is a call expression to the function with the given name.
+func isCallTo(expr ast.Expr, name string) bool {
+	callexpr, ok := expr.(*ast.CallExpr)
+	if !ok {
+		return false
+	}
+	return isIdentWithName(callexpr.Fun, name)
+}
+
+// replaceAssignment replaces an assignment statement where the right hand side is a function call
+// whose arguments have the same names as the parameters to funcdecl with the body of funcdecl.
+// It sets the left hand side of the assignment to the return values of the function.
+func replaceAssignment(cursor *astutil.Cursor, assign *ast.AssignStmt, funcdecl *ast.FuncDecl) {
+	if !hasTerminatingReturn(funcdecl.Body) {
+		log.Fatal("function being inlined must have a return at the end")
+	}
+
+	body := internalastutil.CloneNode(funcdecl.Body)
+	if hasTerminatingAndNonterminatingReturn(funcdecl.Body) {
+		// The function has multiple return points. Add the code that we'd continue with in the caller
+		// after each of the return points. The calling function must have a terminating return
+		// so we don't continue execution in the replaced function after we finish executing the
+		// continue block that we add.
+		body = addContinues(cursor, assign, body, everythingFollowingInParent(cursor)).(*ast.BlockStmt)
+	}
+
+	if len(body.List) < 1 {
+		log.Fatal("replacing with empty bodied function")
+	}
+
+	// The op happens in two steps: first we insert the body of the function being inlined (except for
+	// the final return) before the assignment, and then we change the assignment statement to replace the function call
+	// with the expressions being returned.
+
+	// Determine the expressions being returned.
+	beforeReturn, ret := body.List[:len(body.List)-1], body.List[len(body.List)-1]
+	returnStmt, ok := ret.(*ast.ReturnStmt)
+	if !ok {
+		log.Fatal("last stmt in function we're replacing with should be a return")
+	}
+	results := returnStmt.Results
+
+	// Insert the body up to the final return.
+	for _, stmt := range beforeReturn {
+		cursor.InsertBefore(stmt)
+	}
+
+	// Rewrite the assignment statement.
+	replaceWithAssignment(cursor, assign.Lhs, results, assign.Tok)
+}
+
+// hasTerminatingReturn reparts whether the block ends in a return statement.
+func hasTerminatingReturn(block *ast.BlockStmt) bool {
+	_, ok := block.List[len(block.List)-1].(*ast.ReturnStmt)
+	return ok
+}
+
+// hasTerminatingAndNonterminatingReturn reports whether the block ends in a return
+// statement, and also has a return elsewhere in it.
+func hasTerminatingAndNonterminatingReturn(block *ast.BlockStmt) bool {
+	if !hasTerminatingReturn(block) {
+		return false
+	}
+	var ret bool
+	for i := range block.List[:len(block.List)-1] {
+		ast.Inspect(block.List[i], func(node ast.Node) bool {
+			_, ok := node.(*ast.ReturnStmt)
+			if ok {
+				ret = true
+				return false
+			}
+			return true
+		})
+	}
+	return ret
+}
+
+// everythingFollowingInParent returns a block with everything in the parent block node of the cursor after
+// the cursor itself. The cursor must point to an element in a block node's list.
+func everythingFollowingInParent(cursor *astutil.Cursor) *ast.BlockStmt {
+	parent := cursor.Parent()
+	block, ok := parent.(*ast.BlockStmt)
+	if !ok {
+		log.Fatal("internal error: in everythingFollowingInParent, cursor doesn't point to element in block list")
+	}
+
+	blockcopy := internalastutil.CloneNode(block)      // get a clean copy
+	blockcopy.List = blockcopy.List[cursor.Index()+1:] // and remove everything before and including stmt
+
+	if _, ok := blockcopy.List[len(blockcopy.List)-1].(*ast.ReturnStmt); !ok {
+		log.Printf("%s", mustFormatNode(token.NewFileSet(), blockcopy))
+		log.Fatal("internal error: parent doesn't end in a return")
+	}
+	return blockcopy
+}
+
+// in the case that there's a return in the body being inlined (toBlock), addContinues
+// replaces those returns that are not at the end of the function with the code in the
+// caller after the function call that execution would continue with after the return.
+// The block being added must end in a return.
+func addContinues(cursor *astutil.Cursor, assignNode *ast.AssignStmt, toBlock *ast.BlockStmt, continueBlock *ast.BlockStmt) ast.Node {
+	if !hasTerminatingReturn(continueBlock) {
+		log.Fatal("the block being continued to in addContinues must end in a return")
+	}
+	applyFunc := func(cursor *astutil.Cursor) bool {
+		ret, ok := cursor.Node().(*ast.ReturnStmt)
+		if !ok {
+			return true
+		}
+
+		if cursor.Parent() == toBlock && cursor.Index() == len(toBlock.List)-1 {
+			return false
+		}
+
+		// This is the opposite of replacing a function call with the body. First
+		// we replace the return statement with the assignment from the caller, and
+		// then add the code we continue with.
+		replaceWithAssignment(cursor, assignNode.Lhs, ret.Results, assignNode.Tok)
+		cursor.InsertAfter(internalastutil.CloneNode(continueBlock))
+
+		return false
+	}
+	return astutil.Apply(toBlock, applyFunc, nil)
+}
+
+// debugPrint prints out the expressions given by nodes for debugging.
+func debugPrint(nodes ...ast.Expr) string {
+	var b strings.Builder
+	for i, node := range nodes {
+		b.Write(mustFormatNode(token.NewFileSet(), node))
+		if i != len(nodes)-1 {
+			b.WriteString(", ")
+		}
+	}
+	return b.String()
+}
+
+// mustFormatNode produces the formatted Go code for the given node.
+func mustFormatNode(fset *token.FileSet, node any) []byte {
+	var buf bytes.Buffer
+	format.Node(&buf, fset, node)
+	return buf.Bytes()
+}
+
+// mustMatchExprs makes sure that the expression lists have the same length,
+// and returns the lists of the expressions on the lhs and rhs where the
+// identifiers are not the same. These are used to produce assignment statements
+// where the expressions on the right are assigned to the identifiers on the left.
+func mustMatchExprs(lhs []ast.Expr, rhs []ast.Expr) ([]ast.Expr, []ast.Expr) {
+	if len(lhs) != len(rhs) {
+		log.Fatal("exprs don't match", debugPrint(lhs...), debugPrint(rhs...))
+	}
+
+	var newLhs, newRhs []ast.Expr
+	for i := range lhs {
+		lhsIdent, ok1 := lhs[i].(*ast.Ident)
+		rhsIdent, ok2 := rhs[i].(*ast.Ident)
+		if ok1 && ok2 && lhsIdent.Name == rhsIdent.Name {
+			continue
+		}
+		newLhs = append(newLhs, lhs[i])
+		newRhs = append(newRhs, rhs[i])
+	}
+
+	return newLhs, newRhs
+}
+
+// replaceWithAssignment replaces the node pointed to by the cursor with an assignment of the
+// left hand side to the righthand side, removing any redundant assignments of a variable to itself,
+// and replacing an assignment to a single basic literal with a constant declaration.
+func replaceWithAssignment(cursor *astutil.Cursor, lhs, rhs []ast.Expr, tok token.Token) {
+	newLhs, newRhs := mustMatchExprs(lhs, rhs)
+	if len(newLhs) == 0 {
+		cursor.Delete()
+		return
+	}
+	if len(newRhs) == 1 {
+		if lit, ok := newRhs[0].(*ast.BasicLit); ok {
+			constDecl := &ast.DeclStmt{
+				Decl: &ast.GenDecl{
+					Tok: token.CONST,
+					Specs: []ast.Spec{
+						&ast.ValueSpec{
+							Names:  []*ast.Ident{newLhs[0].(*ast.Ident)},
+							Values: []ast.Expr{lit},
+						},
+					},
+				},
+			}
+			cursor.Replace(constDecl)
+			return
+		}
+	}
+	newAssignment := &ast.AssignStmt{
+		Lhs: newLhs,
+		Rhs: newRhs,
+		Tok: tok,
+	}
+	cursor.Replace(newAssignment)
+}
+
+// generateTable generates the file with the jump tables for the specialized malloc functions.
+func generateTable(sizeToSizeClass []uint8) []byte {
+	scMax := sizeToSizeClass[smallScanNoHeaderMax]
+
+	var b bytes.Buffer
+	fmt.Fprintln(&b, `// Code generated by mkmalloc.go; DO NOT EDIT.
+//go:build !plan9
+
+package runtime
+
+import "unsafe"
+
+var mallocScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
+
+	for i := range uintptr(smallScanNoHeaderMax + 1) {
+		fmt.Fprintf(&b, "%s,\n", smallScanNoHeaderSCFuncName(sizeToSizeClass[i], scMax))
+	}
+
+	fmt.Fprintln(&b, `
+}
+
+var mallocNoScanTable = [513]func(size uintptr, typ *_type, needzero bool) unsafe.Pointer{`)
+	for i := range uintptr(smallScanNoHeaderMax + 1) {
+		if i < 16 {
+			fmt.Fprintf(&b, "%s,\n", tinyFuncName(i))
+		} else {
+			fmt.Fprintf(&b, "%s,\n", smallNoScanSCFuncName(sizeToSizeClass[i], scMax))
+		}
+	}
+
+	fmt.Fprintln(&b, `
+}`)
+
+	return b.Bytes()
+}
--- a/src/runtime/_mkmalloc/mkmalloc_test.go
+++ b/src/runtime/_mkmalloc/mkmalloc_test.go
@ -0,0 +1,36 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package main
+
+import (
+	"bytes"
+	"os"
+	"testing"
+)
+
+func TestNoChange(t *testing.T) {
+	classes := makeClasses()
+	sizeToSizeClass := makeSizeToSizeClass(classes)
+
+	outfile := "../malloc_generated.go"
+	want, err := os.ReadFile(outfile)
+	if err != nil {
+		t.Fatal(err)
+	}
+	got := mustFormat(inline(specializedMallocConfig(classes, sizeToSizeClass)))
+	if !bytes.Equal(want, got) {
+		t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(want), withLineNumbers(got))
+	}
+
+	tablefile := "../malloc_tables_generated.go"
+	wanttable, err := os.ReadFile(tablefile)
+	if err != nil {
+		t.Fatal(err)
+	}
+	gotTable := mustFormat(generateTable(sizeToSizeClass))
+	if !bytes.Equal(wanttable, gotTable) {
+		t.Fatalf("want:\n%s\ngot:\n%s\n", withLineNumbers(wanttable), withLineNumbers(gotTable))
+	}
+}
--- a/src/runtime/_mkmalloc/mksizeclasses.go
+++ b/src/runtime/_mkmalloc/mksizeclasses.go
@ -31,19 +31,14 @@ import (
 	"bytes"
 	"flag"
 	"fmt"
-	"go/format"
 	"io"
-	"log"
 	"math"
 	"math/bits"
-	"os"
 )

 // Generate internal/runtime/gc/msize.go

-var stdout = flag.Bool("stdout", false, "write to stdout instead of sizeclasses.go")
-
-func main() {
+func generateSizeClasses(classes []class) []byte {
 	flag.Parse()

 	var b bytes.Buffer
@ -51,39 +46,14 @@ func main() {
 	fmt.Fprintln(&b, "//go:generate go -C ../../../runtime/_mkmalloc run mksizeclasses.go")
 	fmt.Fprintln(&b)
 	fmt.Fprintln(&b, "package gc")
-	classes := makeClasses()

 	printComment(&b, classes)

 	printClasses(&b, classes)

-	out, err := format.Source(b.Bytes())
-	if err != nil {
-		log.Fatal(err)
-	}
-	if *stdout {
-		_, err = os.Stdout.Write(out)
-	} else {
-		err = os.WriteFile("../../internal/runtime/gc/sizeclasses.go", out, 0666)
-	}
-	if err != nil {
-		log.Fatal(err)
-	}
+	return b.Bytes()
 }

-const (
-	// Constants that we use and will transfer to the runtime.
-	minHeapAlign = 8
-	maxSmallSize = 32 << 10
-	smallSizeDiv = 8
-	smallSizeMax = 1024
-	largeSizeDiv = 128
-	pageShift    = 13
-
-	// Derived constants.
-	pageSize = 1 << pageShift
-)
-
 type class struct {
 	size   int // max size
 	npages int // number of pages
@ -294,6 +264,15 @@ func maxNPages(classes []class) int {
 }

 func printClasses(w io.Writer, classes []class) {
+	sizeToSizeClass := func(size int) int {
+		for j, c := range classes {
+			if c.size >= size {
+				return j
+			}
+		}
+		panic("unreachable")
+	}
+
 	fmt.Fprintln(w, "const (")
 	fmt.Fprintf(w, "MinHeapAlign = %d\n", minHeapAlign)
 	fmt.Fprintf(w, "MaxSmallSize = %d\n", maxSmallSize)
@ -304,6 +283,8 @@ func printClasses(w io.Writer, classes []class) {
 	fmt.Fprintf(w, "PageShift = %d\n", pageShift)
 	fmt.Fprintf(w, "MaxObjsPerSpan = %d\n", maxObjsPerSpan(classes))
 	fmt.Fprintf(w, "MaxSizeClassNPages = %d\n", maxNPages(classes))
+	fmt.Fprintf(w, "TinySize = %d\n", tinySize)
+	fmt.Fprintf(w, "TinySizeClass = %d\n", sizeToSizeClass(tinySize))
 	fmt.Fprintln(w, ")")

 	fmt.Fprint(w, "var SizeClassToSize = [NumSizeClasses]uint16 {")
@ -332,12 +313,7 @@ func printClasses(w io.Writer, classes []class) {
 	sc := make([]int, smallSizeMax/smallSizeDiv+1)
 	for i := range sc {
 		size := i * smallSizeDiv
-		for j, c := range classes {
-			if c.size >= size {
-				sc[i] = j
-				break
-			}
-		}
+		sc[i] = sizeToSizeClass(size)
 	}
 	fmt.Fprint(w, "var SizeToSizeClass8 = [SmallSizeMax/SmallSizeDiv+1]uint8 {")
 	for _, v := range sc {
@ -349,12 +325,7 @@ func printClasses(w io.Writer, classes []class) {
 	sc = make([]int, (maxSmallSize-smallSizeMax)/largeSizeDiv+1)
 	for i := range sc {
 		size := smallSizeMax + i*largeSizeDiv
-		for j, c := range classes {
-			if c.size >= size {
-				sc[i] = j
-				break
-			}
-		}
+		sc[i] = sizeToSizeClass(size)
 	}
 	fmt.Fprint(w, "var SizeToSizeClass128 = [(MaxSmallSize-SmallSizeMax)/LargeSizeDiv+1]uint8 {")
 	for _, v := range sc {
--- a/src/runtime/cgo.go
+++ b/src/runtime/cgo.go
@ -15,7 +15,9 @@ import "unsafe"
 //go:linkname _cgo_sys_thread_create _cgo_sys_thread_create
 //go:linkname _cgo_notify_runtime_init_done _cgo_notify_runtime_init_done
 //go:linkname _cgo_callers _cgo_callers
-//go:linkname _cgo_set_context_function _cgo_set_context_function
+//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions
+//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function
+//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function
 //go:linkname _cgo_yield _cgo_yield
 //go:linkname _cgo_pthread_key_created _cgo_pthread_key_created
 //go:linkname _cgo_bindm _cgo_bindm
@ -27,7 +29,9 @@ var (
 	_cgo_sys_thread_create        unsafe.Pointer
 	_cgo_notify_runtime_init_done unsafe.Pointer
 	_cgo_callers                  unsafe.Pointer
-	_cgo_set_context_function     unsafe.Pointer
+	_cgo_set_traceback_functions  unsafe.Pointer
+	_cgo_call_traceback_function  unsafe.Pointer
+	_cgo_call_symbolizer_function unsafe.Pointer
 	_cgo_yield                    unsafe.Pointer
 	_cgo_pthread_key_created      unsafe.Pointer
 	_cgo_bindm                    unsafe.Pointer
--- a/src/runtime/cgo/callbacks.go
+++ b/src/runtime/cgo/callbacks.go
@ -121,13 +121,30 @@ var _cgo_bindm = &x_cgo_bindm
 var x_cgo_notify_runtime_init_done byte
 var _cgo_notify_runtime_init_done = &x_cgo_notify_runtime_init_done

-// Sets the traceback context function. See runtime.SetCgoTraceback.
+// Sets the traceback, context, and symbolizer functions. See
+// runtime.SetCgoTraceback.

-//go:cgo_import_static x_cgo_set_context_function
-//go:linkname x_cgo_set_context_function x_cgo_set_context_function
-//go:linkname _cgo_set_context_function _cgo_set_context_function
-var x_cgo_set_context_function byte
-var _cgo_set_context_function = &x_cgo_set_context_function
+//go:cgo_import_static x_cgo_set_traceback_functions
+//go:linkname x_cgo_set_traceback_functions x_cgo_set_traceback_functions
+//go:linkname _cgo_set_traceback_functions _cgo_set_traceback_functions
+var x_cgo_set_traceback_functions byte
+var _cgo_set_traceback_functions = &x_cgo_set_traceback_functions
+
+// Call the traceback function registered with x_cgo_set_traceback_functions.
+
+//go:cgo_import_static x_cgo_call_traceback_function
+//go:linkname x_cgo_call_traceback_function x_cgo_call_traceback_function
+//go:linkname _cgo_call_traceback_function _cgo_call_traceback_function
+var x_cgo_call_traceback_function byte
+var _cgo_call_traceback_function = &x_cgo_call_traceback_function
+
+// Call the symbolizer function registered with x_cgo_set_symbolizer_functions.
+
+//go:cgo_import_static x_cgo_call_symbolizer_function
+//go:linkname x_cgo_call_symbolizer_function x_cgo_call_symbolizer_function
+//go:linkname _cgo_call_symbolizer_function _cgo_call_symbolizer_function
+var x_cgo_call_symbolizer_function byte
+var _cgo_call_symbolizer_function = &x_cgo_call_symbolizer_function

 // Calls a libc function to execute background work injected via libc
 // interceptors, such as processing pending signals under the thread
--- a/src/runtime/cgo/gcc_context.c
+++ b/src/runtime/cgo/gcc_context.c
@ -8,11 +8,11 @@

 // Releases the cgo traceback context.
 void _cgo_release_context(uintptr_t ctxt) {
-	void (*pfn)(struct context_arg*);
+	void (*pfn)(struct cgoContextArg*);

 	pfn = _cgo_get_context_function();
 	if (ctxt != 0 && pfn != nil) {
-		struct context_arg arg;
+		struct cgoContextArg arg;

 		arg.Context = ctxt;
 		(*pfn)(&arg);
--- a/src/runtime/cgo/gcc_libinit.c
+++ b/src/runtime/cgo/gcc_libinit.c
@ -32,8 +32,14 @@ static void pthread_key_destructor(void* g);
 uintptr_t x_cgo_pthread_key_created;
 void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t);

+// The traceback function, used when tracing C calls.
+static void (*cgo_traceback_function)(struct cgoTracebackArg*);
+
 // The context function, used when tracing back C calls into Go.
-static void (*cgo_context_function)(struct context_arg*);
+static void (*cgo_context_function)(struct cgoContextArg*);
+
+// The symbolizer function, used when symbolizing C frames.
+static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*);

 void
 x_cgo_sys_thread_create(void* (*func)(void*), void* arg) {
@ -52,7 +58,7 @@ x_cgo_sys_thread_create(void* (*func)(void*), void* arg) {

 uintptr_t
 _cgo_wait_runtime_init_done(void) {
-	void (*pfn)(struct context_arg*);
+	void (*pfn)(struct cgoContextArg*);
 	int done;

 	pfn = __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME);
@ -70,7 +76,6 @@ _cgo_wait_runtime_init_done(void) {
 			x_cgo_pthread_key_created = 1;
 		}

-
 		// TODO(iant): For the case of a new C thread calling into Go, such
 		// as when using -buildmode=c-archive, we know that Go runtime
 		// initialization is complete but we do not know that all Go init
@ -87,7 +92,7 @@ _cgo_wait_runtime_init_done(void) {
 	}

 	if (pfn != nil) {
-		struct context_arg arg;
+		struct cgoContextArg arg;

 		arg.Context = 0;
 		(*pfn)(&arg);
@ -138,17 +143,71 @@ x_cgo_notify_runtime_init_done(void* dummy __attribute__ ((unused))) {
 	pthread_mutex_unlock(&runtime_init_mu);
 }

-// Sets the context function to call to record the traceback context
-// when calling a Go function from C code. Called from runtime.SetCgoTraceback.
-void x_cgo_set_context_function(void (*context)(struct context_arg*)) {
-	__atomic_store_n(&cgo_context_function, context, __ATOMIC_RELEASE);
+// Sets the traceback, context, and symbolizer functions. Called from
+// runtime.SetCgoTraceback.
+void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) {
+	__atomic_store_n(&cgo_traceback_function, arg->Traceback, __ATOMIC_RELEASE);
+	__atomic_store_n(&cgo_context_function, arg->Context, __ATOMIC_RELEASE);
+	__atomic_store_n(&cgo_symbolizer_function, arg->Symbolizer, __ATOMIC_RELEASE);
 }

-// Gets the context function.
-void (*(_cgo_get_context_function(void)))(struct context_arg*) {
+// Gets the traceback function to call to trace C calls.
+void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) {
+	return __atomic_load_n(&cgo_traceback_function, __ATOMIC_CONSUME);
+}
+
+// Call the traceback function registered with x_cgo_set_traceback_functions.
+//
+// The traceback function is an arbitrary user C function which may be built
+// with TSAN, and thus must be wrapped with TSAN acquire/release calls. For
+// normal cgo calls, cmd/cgo automatically inserts TSAN acquire/release calls.
+// Since the traceback, context, and symbolizer functions are registered at
+// startup and called via the runtime, they do not get automatic TSAN
+// acquire/release calls.
+//
+// The only purpose of this wrapper is to perform TSAN acquire/release.
+// Alternatively, if the runtime arranged to safely call TSAN acquire/release,
+// it could perform the call directly.
+void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) {
+	void (*pfn)(struct cgoTracebackArg*);
+
+	pfn = _cgo_get_traceback_function();
+	if (pfn == nil) {
+		return;
+	}
+
+	_cgo_tsan_acquire();
+	(*pfn)(arg);
+	_cgo_tsan_release();
+}
+
+// Gets the context function to call to record the traceback context
+// when calling a Go function from C code.
+void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) {
 	return __atomic_load_n(&cgo_context_function, __ATOMIC_CONSUME);
 }

+// Gets the symbolizer function to call to symbolize C frames.
+void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) {
+	return __atomic_load_n(&cgo_symbolizer_function, __ATOMIC_CONSUME);
+}
+
+// Call the symbolizer function registered with x_cgo_set_traceback_functions.
+//
+// See comment on x_cgo_call_traceback_function.
+void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) {
+	void (*pfn)(struct cgoSymbolizerArg*);
+
+	pfn = _cgo_get_symbolizer_function();
+	if (pfn == nil) {
+		return;
+	}
+
+	_cgo_tsan_acquire();
+	(*pfn)(arg);
+	_cgo_tsan_release();
+}
+
 // _cgo_try_pthread_create retries pthread_create if it fails with
 // EAGAIN.
 int
--- a/src/runtime/cgo/gcc_libinit_windows.c
+++ b/src/runtime/cgo/gcc_libinit_windows.c
@ -32,6 +32,7 @@ static CRITICAL_SECTION runtime_init_cs;
 static HANDLE runtime_init_wait;
 static int runtime_init_done;

+// No pthreads on Windows, these are always zero.
 uintptr_t x_cgo_pthread_key_created;
 void (*x_crosscall2_ptr)(void (*fn)(void *), void *, int, size_t);

@ -81,7 +82,7 @@ _cgo_is_runtime_initialized() {

 uintptr_t
 _cgo_wait_runtime_init_done(void) {
-	void (*pfn)(struct context_arg*);
+	void (*pfn)(struct cgoContextArg*);

 	 _cgo_maybe_run_preinit();
 	while (!_cgo_is_runtime_initialized()) {
@ -89,7 +90,7 @@ _cgo_wait_runtime_init_done(void) {
 	}
 	pfn = _cgo_get_context_function();
 	if (pfn != nil) {
-		struct context_arg arg;
+		struct cgoContextArg arg;

 		arg.Context = 0;
 		(*pfn)(&arg);
@ -118,20 +119,54 @@ x_cgo_notify_runtime_init_done(void* dummy) {
 	}
 }

-// The context function, used when tracing back C calls into Go.
-static void (*cgo_context_function)(struct context_arg*);
+// The traceback function, used when tracing C calls.
+static void (*cgo_traceback_function)(struct cgoTracebackArg*);

-// Sets the context function to call to record the traceback context
-// when calling a Go function from C code. Called from runtime.SetCgoTraceback.
-void x_cgo_set_context_function(void (*context)(struct context_arg*)) {
+// The context function, used when tracing back C calls into Go.
+static void (*cgo_context_function)(struct cgoContextArg*);
+
+// The symbolizer function, used when symbolizing C frames.
+static void (*cgo_symbolizer_function)(struct cgoSymbolizerArg*);
+
+// Sets the traceback, context, and symbolizer functions. Called from
+// runtime.SetCgoTraceback.
+void x_cgo_set_traceback_functions(struct cgoSetTracebackFunctionsArg* arg) {
 	EnterCriticalSection(&runtime_init_cs);
-	cgo_context_function = context;
+	cgo_traceback_function = arg->Traceback;
+	cgo_context_function = arg->Context;
+	cgo_symbolizer_function = arg->Symbolizer;
 	LeaveCriticalSection(&runtime_init_cs);
 }

-// Gets the context function.
-void (*(_cgo_get_context_function(void)))(struct context_arg*) {
-	void (*ret)(struct context_arg*);
+// Gets the traceback function to call to trace C calls.
+void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*) {
+	void (*ret)(struct cgoTracebackArg*);
+
+	EnterCriticalSection(&runtime_init_cs);
+	ret = cgo_traceback_function;
+	LeaveCriticalSection(&runtime_init_cs);
+	return ret;
+}
+
+// Call the traceback function registered with x_cgo_set_traceback_functions.
+//
+// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is
+// no C/C++ TSAN.
+void x_cgo_call_traceback_function(struct cgoTracebackArg* arg) {
+	void (*pfn)(struct cgoTracebackArg*);
+
+	pfn = _cgo_get_traceback_function();
+	if (pfn == nil) {
+		return;
+	}
+
+	(*pfn)(arg);
+}
+
+// Gets the context function to call to record the traceback context
+// when calling a Go function from C code.
+void (*(_cgo_get_context_function(void)))(struct cgoContextArg*) {
+	void (*ret)(struct cgoContextArg*);

 	EnterCriticalSection(&runtime_init_cs);
 	ret = cgo_context_function;
@ -139,13 +174,38 @@ void (*(_cgo_get_context_function(void)))(struct context_arg*) {
 	return ret;
 }

+// Gets the symbolizer function to call to symbolize C frames.
+void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*) {
+	void (*ret)(struct cgoSymbolizerArg*);
+
+	EnterCriticalSection(&runtime_init_cs);
+	ret = cgo_symbolizer_function;
+	LeaveCriticalSection(&runtime_init_cs);
+	return ret;
+}
+
+// Call the symbolizer function registered with x_cgo_set_symbolizer_functions.
+//
+// On other platforms, this coordinates with C/C++ TSAN. On Windows, there is
+// no C/C++ TSAN.
+void x_cgo_call_symbolizer_function(struct cgoSymbolizerArg* arg) {
+	void (*pfn)(struct cgoSymbolizerArg*);
+
+	pfn = _cgo_get_symbolizer_function();
+	if (pfn == nil) {
+		return;
+	}
+
+	(*pfn)(arg);
+}
+
 void _cgo_beginthread(unsigned long (__stdcall *func)(void*), void* arg) {
 	int tries;
 	HANDLE thandle;

 	for (tries = 0; tries < 20; tries++) {
 		thandle = CreateThread(NULL, 0, func, arg, 0, NULL);
-		if (thandle == 0 && GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
+		if (thandle == 0 && GetLastError() == ERROR_ACCESS_DENIED) {
 			// "Insufficient resources", try again in a bit.
 			//
 			// Note that the first Sleep(0) is a yield.
--- a/src/runtime/cgo/libcgo.h
+++ b/src/runtime/cgo/libcgo.h
@ -89,15 +89,7 @@ void darwin_arm_init_thread_exception_port(void);
 void darwin_arm_init_mach_exception_handler(void);

 /*
- * The cgo context function. See runtime.SetCgoTraceback.
- */
-struct context_arg {
-	uintptr_t Context;
-};
-extern void (*(_cgo_get_context_function(void)))(struct context_arg*);
-
-/*
- * The argument for the cgo traceback callback. See runtime.SetCgoTraceback.
+ * The cgo traceback callback. See runtime.SetCgoTraceback.
 */
 struct cgoTracebackArg {
 	uintptr_t  Context;
@ -105,6 +97,38 @@ struct cgoTracebackArg {
 	uintptr_t* Buf;
 	uintptr_t  Max;
 };
+extern void (*(_cgo_get_traceback_function(void)))(struct cgoTracebackArg*);
+
+/*
+ * The cgo context callback. See runtime.SetCgoTraceback.
+ */
+struct cgoContextArg {
+	uintptr_t Context;
+};
+extern void (*(_cgo_get_context_function(void)))(struct cgoContextArg*);
+
+/*
+ * The argument for the cgo symbolizer callback. See runtime.SetCgoTraceback.
+ */
+struct cgoSymbolizerArg {
+	uintptr_t   PC;
+	const char* File;
+	uintptr_t   Lineno;
+	const char* Func;
+	uintptr_t   Entry;
+	uintptr_t   More;
+	uintptr_t   Data;
+};
+extern void (*(_cgo_get_symbolizer_function(void)))(struct cgoSymbolizerArg*);
+
+/*
+ * The argument for x_cgo_set_traceback_functions. See runtime.SetCgoTraceback.
+ */
+struct cgoSetTracebackFunctionsArg {
+	void (*Traceback)(struct cgoTracebackArg*);
+	void (*Context)(struct cgoContextArg*);
+	void (*Symbolizer)(struct cgoSymbolizerArg*);
+};

 /*
 * TSAN support.  This is only useful when building with
@ -121,11 +145,21 @@ struct cgoTracebackArg {

 #ifdef CGO_TSAN

+// _cgo_tsan_acquire tells C/C++ TSAN that we are acquiring a dummy lock. We
+// call this when calling from Go to C. This is necessary because TSAN cannot
+// see the synchronization in Go. Note that C/C++ code built with TSAN is not
+// the same as the Go race detector.
+//
+// cmd/cgo generates calls to _cgo_tsan_acquire and _cgo_tsan_release. For
+// other cgo calls, manual calls are required.
+//
 // These must match the definitions in yesTsanProlog in cmd/cgo/out.go.
 // In general we should call _cgo_tsan_acquire when we enter C code,
 // and call _cgo_tsan_release when we return to Go code.
+//
 // This is only necessary when calling code that might be instrumented
 // by TSAN, which mostly means system library calls that TSAN intercepts.
+//
 // See the comment in cmd/cgo/out.go for more details.

 long long _cgo_sync __attribute__ ((common));
--- a/src/runtime/export_test.go
+++ b/src/runtime/export_test.go
@ -1289,30 +1289,6 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int {
 	return result
 }

-type MSpanQueue mSpanQueue
-
-func (q *MSpanQueue) Size() int {
-	return (*mSpanQueue)(q).n
-}
-
-func (q *MSpanQueue) Push(s *MSpan) {
-	(*mSpanQueue)(q).push((*mspan)(s))
-}
-
-func (q *MSpanQueue) Pop() *MSpan {
-	s := (*mSpanQueue)(q).pop()
-	return (*MSpan)(s)
-}
-
-func (q *MSpanQueue) TakeAll(p *MSpanQueue) {
-	(*mSpanQueue)(q).takeAll((*mSpanQueue)(p))
-}
-
-func (q *MSpanQueue) PopN(n int) MSpanQueue {
-	p := (*mSpanQueue)(q).popN(n)
-	return (MSpanQueue)(p)
-}
-
 const (
 	TimeHistSubBucketBits = timeHistSubBucketBits
 	TimeHistNumSubBuckets = timeHistNumSubBuckets
--- a/src/runtime/gc_test.go
+++ b/src/runtime/gc_test.go
@ -881,199 +881,6 @@ func TestWeakToStrongMarkTermination(t *testing.T) {
 	}
 }

-func TestMSpanQueue(t *testing.T) {
-	expectSize := func(t *testing.T, q *runtime.MSpanQueue, want int) {
-		t.Helper()
-		if got := q.Size(); got != want {
-			t.Errorf("expected size %d, got %d", want, got)
-		}
-	}
-	expectMSpan := func(t *testing.T, got, want *runtime.MSpan, op string) {
-		t.Helper()
-		if got != want {
-			t.Errorf("expected mspan %p from %s, got %p", want, op, got)
-		}
-	}
-	makeSpans := func(t *testing.T, n int) ([]*runtime.MSpan, func()) {
-		t.Helper()
-		spans := make([]*runtime.MSpan, 0, n)
-		for range cap(spans) {
-			spans = append(spans, runtime.AllocMSpan())
-		}
-		return spans, func() {
-			for i, s := range spans {
-				runtime.FreeMSpan(s)
-				spans[i] = nil
-			}
-		}
-	}
-	t.Run("Empty", func(t *testing.T) {
-		var q runtime.MSpanQueue
-		expectSize(t, &q, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-	})
-	t.Run("PushPop", func(t *testing.T) {
-		s := runtime.AllocMSpan()
-		defer runtime.FreeMSpan(s)
-
-		var q runtime.MSpanQueue
-		q.Push(s)
-		expectSize(t, &q, 1)
-		expectMSpan(t, q.Pop(), s, "pop")
-		expectMSpan(t, q.Pop(), nil, "pop")
-	})
-	t.Run("PushPopPushPop", func(t *testing.T) {
-		s0 := runtime.AllocMSpan()
-		defer runtime.FreeMSpan(s0)
-		s1 := runtime.AllocMSpan()
-		defer runtime.FreeMSpan(s1)
-
-		var q runtime.MSpanQueue
-
-		// Push and pop s0.
-		q.Push(s0)
-		expectSize(t, &q, 1)
-		expectMSpan(t, q.Pop(), s0, "pop")
-		expectMSpan(t, q.Pop(), nil, "pop")
-
-		// Push and pop s1.
-		q.Push(s1)
-		expectSize(t, &q, 1)
-		expectMSpan(t, q.Pop(), s1, "pop")
-		expectMSpan(t, q.Pop(), nil, "pop")
-	})
-	t.Run("PushPushPopPop", func(t *testing.T) {
-		s0 := runtime.AllocMSpan()
-		defer runtime.FreeMSpan(s0)
-		s1 := runtime.AllocMSpan()
-		defer runtime.FreeMSpan(s1)
-
-		var q runtime.MSpanQueue
-		q.Push(s0)
-		expectSize(t, &q, 1)
-		q.Push(s1)
-		expectSize(t, &q, 2)
-		expectMSpan(t, q.Pop(), s0, "pop")
-		expectMSpan(t, q.Pop(), s1, "pop")
-		expectMSpan(t, q.Pop(), nil, "pop")
-	})
-	t.Run("EmptyTakeAll", func(t *testing.T) {
-		var q runtime.MSpanQueue
-		var p runtime.MSpanQueue
-		expectSize(t, &p, 0)
-		expectSize(t, &q, 0)
-		p.TakeAll(&q)
-		expectSize(t, &p, 0)
-		expectSize(t, &q, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-	t.Run("Push4TakeAll", func(t *testing.T) {
-		spans, free := makeSpans(t, 4)
-		defer free()
-
-		var q runtime.MSpanQueue
-		for i, s := range spans {
-			expectSize(t, &q, i)
-			q.Push(s)
-			expectSize(t, &q, i+1)
-		}
-
-		var p runtime.MSpanQueue
-		p.TakeAll(&q)
-		expectSize(t, &p, 4)
-		for i := range p.Size() {
-			expectMSpan(t, p.Pop(), spans[i], "pop")
-		}
-		expectSize(t, &p, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-	t.Run("Push4Pop3", func(t *testing.T) {
-		spans, free := makeSpans(t, 4)
-		defer free()
-
-		var q runtime.MSpanQueue
-		for i, s := range spans {
-			expectSize(t, &q, i)
-			q.Push(s)
-			expectSize(t, &q, i+1)
-		}
-		p := q.PopN(3)
-		expectSize(t, &p, 3)
-		expectSize(t, &q, 1)
-		for i := range p.Size() {
-			expectMSpan(t, p.Pop(), spans[i], "pop")
-		}
-		expectMSpan(t, q.Pop(), spans[len(spans)-1], "pop")
-		expectSize(t, &p, 0)
-		expectSize(t, &q, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-	t.Run("Push4Pop0", func(t *testing.T) {
-		spans, free := makeSpans(t, 4)
-		defer free()
-
-		var q runtime.MSpanQueue
-		for i, s := range spans {
-			expectSize(t, &q, i)
-			q.Push(s)
-			expectSize(t, &q, i+1)
-		}
-		p := q.PopN(0)
-		expectSize(t, &p, 0)
-		expectSize(t, &q, 4)
-		for i := range q.Size() {
-			expectMSpan(t, q.Pop(), spans[i], "pop")
-		}
-		expectSize(t, &p, 0)
-		expectSize(t, &q, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-	t.Run("Push4Pop4", func(t *testing.T) {
-		spans, free := makeSpans(t, 4)
-		defer free()
-
-		var q runtime.MSpanQueue
-		for i, s := range spans {
-			expectSize(t, &q, i)
-			q.Push(s)
-			expectSize(t, &q, i+1)
-		}
-		p := q.PopN(4)
-		expectSize(t, &p, 4)
-		expectSize(t, &q, 0)
-		for i := range p.Size() {
-			expectMSpan(t, p.Pop(), spans[i], "pop")
-		}
-		expectSize(t, &p, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-	t.Run("Push4Pop5", func(t *testing.T) {
-		spans, free := makeSpans(t, 4)
-		defer free()
-
-		var q runtime.MSpanQueue
-		for i, s := range spans {
-			expectSize(t, &q, i)
-			q.Push(s)
-			expectSize(t, &q, i+1)
-		}
-		p := q.PopN(5)
-		expectSize(t, &p, 4)
-		expectSize(t, &q, 0)
-		for i := range p.Size() {
-			expectMSpan(t, p.Pop(), spans[i], "pop")
-		}
-		expectSize(t, &p, 0)
-		expectMSpan(t, q.Pop(), nil, "pop")
-		expectMSpan(t, p.Pop(), nil, "pop")
-	})
-}
-
 func TestDetectFinalizerAndCleanupLeaks(t *testing.T) {
 	got := runTestProg(t, "testprog", "DetectFinalizerAndCleanupLeaks", "GODEBUG=checkfinalizers=1")
 	sp := strings.SplitN(got, "detected possible issues with cleanups and/or finalizers", 2)
--- a/src/runtime/malloc.go
+++ b/src/runtime/malloc.go
@ -127,8 +127,8 @@ const (
 	_64bit = 1 << (^uintptr(0) >> 63) / 2

 	// Tiny allocator parameters, see "Tiny allocator" comment in malloc.go.
-	_TinySize      = 16
-	_TinySizeClass = int8(2)
+	_TinySize      = gc.TinySize
+	_TinySizeClass = int8(gc.TinySizeClass)

 	_FixAllocChunk = 16 << 10 // Chunk size for FixAlloc

@ -1080,6 +1080,12 @@ func (c *mcache) nextFree(spc spanClass) (v gclinkptr, s *mspan, checkGCTrigger
 // at scale.
 const doubleCheckMalloc = false

+// sizeSpecializedMallocEnabled is the set of conditions where we enable the size-specialized
+// mallocgc implementation: the experiment must be enabled, and none of the sanitizers should
+// be enabled. The tables used to select the size-specialized malloc function do not compile
+// properly on plan9, so size-specialized malloc is also disabled on plan9.
+const sizeSpecializedMallocEnabled = goexperiment.SizeSpecializedMalloc && GOOS != "plan9" && !asanenabled && !raceenabled && !msanenabled && !valgrindenabled
+
 // Allocate an object of size bytes.
 // Small objects are allocated from the per-P cache's free lists.
 // Large objects (> 32 kB) are allocated straight from the heap.
@ -1110,6 +1116,17 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 		return unsafe.Pointer(&zerobase)
 	}

+	if sizeSpecializedMallocEnabled && heapBitsInSpan(size) {
+		if typ == nil || !typ.Pointers() {
+			return mallocNoScanTable[size](size, typ, needzero)
+		} else {
+			if !needzero {
+				throw("objects with pointers must be zeroed")
+			}
+			return mallocScanTable[size](size, typ, needzero)
+		}
+	}
+
 	// It's possible for any malloc to trigger sweeping, which may in
 	// turn queue finalizers. Record this dynamic lock edge.
 	// N.B. Compiled away if lockrank experiment is not enabled.
@ -1138,25 +1155,41 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
 	// Actually do the allocation.
 	var x unsafe.Pointer
 	var elemsize uintptr
-	if size <= maxSmallSize-gc.MallocHeaderSize {
-		if typ == nil || !typ.Pointers() {
-			if size < maxTinySize {
-				x, elemsize = mallocgcTiny(size, typ)
-			} else {
+	if sizeSpecializedMallocEnabled {
+		// we know that heapBitsInSpan is true.
+		if size <= maxSmallSize-gc.MallocHeaderSize {
+			if typ == nil || !typ.Pointers() {
 				x, elemsize = mallocgcSmallNoscan(size, typ, needzero)
-			}
-		} else {
-			if !needzero {
-				throw("objects with pointers must be zeroed")
-			}
-			if heapBitsInSpan(size) {
-				x, elemsize = mallocgcSmallScanNoHeader(size, typ)
 			} else {
+				if !needzero {
+					throw("objects with pointers must be zeroed")
+				}
 				x, elemsize = mallocgcSmallScanHeader(size, typ)
 			}
+		} else {
+			x, elemsize = mallocgcLarge(size, typ, needzero)
 		}
 	} else {
-		x, elemsize = mallocgcLarge(size, typ, needzero)
+		if size <= maxSmallSize-gc.MallocHeaderSize {
+			if typ == nil || !typ.Pointers() {
+				if size < maxTinySize {
+					x, elemsize = mallocgcTiny(size, typ)
+				} else {
+					x, elemsize = mallocgcSmallNoscan(size, typ, needzero)
+				}
+			} else {
+				if !needzero {
+					throw("objects with pointers must be zeroed")
+				}
+				if heapBitsInSpan(size) {
+					x, elemsize = mallocgcSmallScanNoHeader(size, typ)
+				} else {
+					x, elemsize = mallocgcSmallScanHeader(size, typ)
+				}
+			}
+		} else {
+			x, elemsize = mallocgcLarge(size, typ, needzero)
+		}
 	}

 	// Notify sanitizers, if enabled.
--- a/src/runtime/malloc_generated.go
+++ b/src/runtime/malloc_generated.go
--- a/src/runtime/malloc_stubs.go
+++ b/src/runtime/malloc_stubs.go
@ -0,0 +1,586 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// This file contains stub functions that are not meant to be called directly,
+// but that will be assembled together using the inlining logic in runtime/_mkmalloc
+// to produce a full mallocgc function that's specialized for a span class
+// or specific size in the case of the tiny allocator.
+//
+// To assemble a mallocgc function, the mallocStub function is cloned, and the call to
+// inlinedMalloc is replaced with the inlined body of smallScanNoHeaderStub,
+// smallNoScanStub or tinyStub, depending on the parameters being specialized.
+//
+// The size_ (for the tiny case) and elemsize_, sizeclass_, and noscanint_ (for all three cases)
+// identifiers are replaced with the value of the parameter in the specialized case.
+// The nextFreeFastStub, nextFreeFastTiny, heapSetTypeNoHeaderStub, and writeHeapBitsSmallStub
+// functions are also inlined by _mkmalloc.
+
+package runtime
+
+import (
+	"internal/goarch"
+	"internal/runtime/sys"
+	"unsafe"
+)
+
+// These identifiers will all be replaced by the inliner. So their values don't
+// really matter: they just need to be set so that the stub functions, which
+// will never be used on their own, can compile. elemsize_ can't be  set to
+// zero because we divide by it in nextFreeFastTiny, and the compiler would
+// complain about a division by zero. Its replaced value will always be greater
+// than zero.
+const elemsize_ = 8
+const sizeclass_ = 0
+const noscanint_ = 0
+const size_ = 0
+
+func malloc0(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
+	if doubleCheckMalloc {
+		if gcphase == _GCmarktermination {
+			throw("mallocgc called with gcphase == _GCmarktermination")
+		}
+	}
+
+	// Short-circuit zero-sized allocation requests.
+	return unsafe.Pointer(&zerobase)
+}
+
+func mallocPanic(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
+	panic("not defined for sizeclass")
+}
+
+func mallocStub(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
+	if doubleCheckMalloc {
+		if gcphase == _GCmarktermination {
+			throw("mallocgc called with gcphase == _GCmarktermination")
+		}
+	}
+
+	// It's possible for any malloc to trigger sweeping, which may in
+	// turn queue finalizers. Record this dynamic lock edge.
+	// N.B. Compiled away if lockrank experiment is not enabled.
+	lockRankMayQueueFinalizer()
+
+	// Pre-malloc debug hooks.
+	if debug.malloc {
+		if x := preMallocgcDebug(size, typ); x != nil {
+			return x
+		}
+	}
+
+	// Assist the GC if needed.
+	if gcBlackenEnabled != 0 {
+		deductAssistCredit(size)
+	}
+
+	// Actually do the allocation.
+	x, elemsize := inlinedMalloc(size, typ, needzero)
+
+	// Adjust our GC assist debt to account for internal fragmentation.
+	if gcBlackenEnabled != 0 && elemsize != 0 {
+		if assistG := getg().m.curg; assistG != nil {
+			assistG.gcAssistBytes -= int64(elemsize - size)
+		}
+	}
+
+	// Post-malloc debug hooks.
+	if debug.malloc {
+		postMallocgcDebug(x, elemsize, typ)
+	}
+	return x
+}
+
+// inlinedMalloc will never be called. It is defined just so that the compiler can compile
+// the mallocStub function, which will also never be called, but instead used as a template
+// to generate a size-specialized malloc function. The call to inlinedMalloc in mallocStub
+// will be replaced with the inlined body of smallScanNoHeaderStub, smallNoScanStub, or tinyStub
+// when generating the size-specialized malloc function. See the comment at the top of this
+// file for more information.
+func inlinedMalloc(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
+	return unsafe.Pointer(uintptr(0)), 0
+}
+
+func doubleCheckSmallScanNoHeader(size uintptr, typ *_type, mp *m) {
+	if mp.mallocing != 0 {
+		throw("malloc deadlock")
+	}
+	if mp.gsignal == getg() {
+		throw("malloc during signal")
+	}
+	if typ == nil || !typ.Pointers() {
+		throw("noscan allocated in scan-only path")
+	}
+	if !heapBitsInSpan(size) {
+		throw("heap bits in not in span for non-header-only path")
+	}
+}
+
+func smallScanNoHeaderStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
+	const sizeclass = sizeclass_
+	const elemsize = elemsize_
+
+	// Set mp.mallocing to keep from being preempted by GC.
+	mp := acquirem()
+	if doubleCheckMalloc {
+		doubleCheckSmallScanNoHeader(size, typ, mp)
+	}
+	mp.mallocing = 1
+
+	checkGCTrigger := false
+	c := getMCache(mp)
+	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
+	span := c.alloc[spc]
+	v := nextFreeFastStub(span)
+	if v == 0 {
+		v, span, checkGCTrigger = c.nextFree(spc)
+	}
+	x := unsafe.Pointer(v)
+	if span.needzero != 0 {
+		memclrNoHeapPointers(x, elemsize)
+	}
+	if goarch.PtrSize == 8 && sizeclass == 1 {
+		// initHeapBits already set the pointer bits for the 8-byte sizeclass
+		// on 64-bit platforms.
+		c.scanAlloc += 8
+	} else {
+		dataSize := size // make the inliner happy
+		x := uintptr(x)
+		scanSize := heapSetTypeNoHeaderStub(x, dataSize, typ, span)
+		c.scanAlloc += scanSize
+	}
+
+	// Ensure that the stores above that initialize x to
+	// type-safe memory and set the heap bits occur before
+	// the caller can make x observable to the garbage
+	// collector. Otherwise, on weakly ordered machines,
+	// the garbage collector could follow a pointer to x,
+	// but see uninitialized memory or stale heap bits.
+	publicationBarrier()
+
+	if writeBarrier.enabled {
+		// Allocate black during GC.
+		// All slots hold nil so no scanning is needed.
+		// This may be racing with GC so do it atomically if there can be
+		// a race marking the bit.
+		gcmarknewobject(span, uintptr(x))
+	} else {
+		// Track the last free index before the mark phase. This field
+		// is only used by the garbage collector. During the mark phase
+		// this is used by the conservative scanner to filter out objects
+		// that are both free and recently-allocated. It's safe to do that
+		// because we allocate-black if the GC is enabled. The conservative
+		// scanner produces pointers out of thin air, so without additional
+		// synchronization it might otherwise observe a partially-initialized
+		// object, which could crash the program.
+		span.freeIndexForScan = span.freeindex
+	}
+
+	// Note cache c only valid while m acquired; see #47302
+	//
+	// N.B. Use the full size because that matches how the GC
+	// will update the mem profile on the "free" side.
+	//
+	// TODO(mknyszek): We should really count the header as part
+	// of gc_sys or something. The code below just pretends it is
+	// internal fragmentation and matches the GC's accounting by
+	// using the whole allocation slot.
+	c.nextSample -= int64(elemsize)
+	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
+		profilealloc(mp, x, elemsize)
+	}
+	mp.mallocing = 0
+	releasem(mp)
+
+	if checkGCTrigger {
+		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
+			gcStart(t)
+		}
+	}
+
+	return x, elemsize
+}
+
+func doubleCheckSmallNoScan(typ *_type, mp *m) {
+	if mp.mallocing != 0 {
+		throw("malloc deadlock")
+	}
+	if mp.gsignal == getg() {
+		throw("malloc during signal")
+	}
+	if typ != nil && typ.Pointers() {
+		throw("expected noscan type for noscan alloc")
+	}
+}
+
+func smallNoScanStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
+	// TODO(matloob): Add functionality to mkmalloc to allow us to inline a non-constant
+	// sizeclass_ and elemsize_ value (instead just set to the expressions to look up the size class
+	// and elemsize. We'd also need to teach mkmalloc that values that are touched by these (specifically
+	// spc below) should turn into vars. This would allow us to generate mallocgcSmallNoScan itself,
+	// so that its code could not diverge from the generated functions.
+	const sizeclass = sizeclass_
+	const elemsize = elemsize_
+
+	// Set mp.mallocing to keep from being preempted by GC.
+	mp := acquirem()
+	if doubleCheckMalloc {
+		doubleCheckSmallNoScan(typ, mp)
+	}
+	mp.mallocing = 1
+
+	checkGCTrigger := false
+	c := getMCache(mp)
+	const spc = spanClass(sizeclass<<1) | spanClass(noscanint_)
+	span := c.alloc[spc]
+	v := nextFreeFastStub(span)
+	if v == 0 {
+		v, span, checkGCTrigger = c.nextFree(spc)
+	}
+	x := unsafe.Pointer(v)
+	if needzero && span.needzero != 0 {
+		memclrNoHeapPointers(x, elemsize)
+	}
+
+	// Ensure that the stores above that initialize x to
+	// type-safe memory and set the heap bits occur before
+	// the caller can make x observable to the garbage
+	// collector. Otherwise, on weakly ordered machines,
+	// the garbage collector could follow a pointer to x,
+	// but see uninitialized memory or stale heap bits.
+	publicationBarrier()
+
+	if writeBarrier.enabled {
+		// Allocate black during GC.
+		// All slots hold nil so no scanning is needed.
+		// This may be racing with GC so do it atomically if there can be
+		// a race marking the bit.
+		gcmarknewobject(span, uintptr(x))
+	} else {
+		// Track the last free index before the mark phase. This field
+		// is only used by the garbage collector. During the mark phase
+		// this is used by the conservative scanner to filter out objects
+		// that are both free and recently-allocated. It's safe to do that
+		// because we allocate-black if the GC is enabled. The conservative
+		// scanner produces pointers out of thin air, so without additional
+		// synchronization it might otherwise observe a partially-initialized
+		// object, which could crash the program.
+		span.freeIndexForScan = span.freeindex
+	}
+
+	// Note cache c only valid while m acquired; see #47302
+	//
+	// N.B. Use the full size because that matches how the GC
+	// will update the mem profile on the "free" side.
+	//
+	// TODO(mknyszek): We should really count the header as part
+	// of gc_sys or something. The code below just pretends it is
+	// internal fragmentation and matches the GC's accounting by
+	// using the whole allocation slot.
+	c.nextSample -= int64(elemsize)
+	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
+		profilealloc(mp, x, elemsize)
+	}
+	mp.mallocing = 0
+	releasem(mp)
+
+	if checkGCTrigger {
+		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
+			gcStart(t)
+		}
+	}
+	return x, elemsize
+}
+
+func doubleCheckTiny(size uintptr, typ *_type, mp *m) {
+	if mp.mallocing != 0 {
+		throw("malloc deadlock")
+	}
+	if mp.gsignal == getg() {
+		throw("malloc during signal")
+	}
+	if typ != nil && typ.Pointers() {
+		throw("expected noscan for tiny alloc")
+	}
+}
+
+func tinyStub(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uintptr) {
+	const constsize = size_
+	const elemsize = elemsize_
+
+	// Set mp.mallocing to keep from being preempted by GC.
+	mp := acquirem()
+	if doubleCheckMalloc {
+		doubleCheckTiny(constsize, typ, mp)
+	}
+	mp.mallocing = 1
+
+	// Tiny allocator.
+	//
+	// Tiny allocator combines several tiny allocation requests
+	// into a single memory block. The resulting memory block
+	// is freed when all subobjects are unreachable. The subobjects
+	// must be noscan (don't have pointers), this ensures that
+	// the amount of potentially wasted memory is bounded.
+	//
+	// Size of the memory block used for combining (maxTinySize) is tunable.
+	// Current setting is 16 bytes, which relates to 2x worst case memory
+	// wastage (when all but one subobjects are unreachable).
+	// 8 bytes would result in no wastage at all, but provides less
+	// opportunities for combining.
+	// 32 bytes provides more opportunities for combining,
+	// but can lead to 4x worst case wastage.
+	// The best case winning is 8x regardless of block size.
+	//
+	// Objects obtained from tiny allocator must not be freed explicitly.
+	// So when an object will be freed explicitly, we ensure that
+	// its size >= maxTinySize.
+	//
+	// SetFinalizer has a special case for objects potentially coming
+	// from tiny allocator, it such case it allows to set finalizers
+	// for an inner byte of a memory block.
+	//
+	// The main targets of tiny allocator are small strings and
+	// standalone escaping variables. On a json benchmark
+	// the allocator reduces number of allocations by ~12% and
+	// reduces heap size by ~20%.
+	c := getMCache(mp)
+	off := c.tinyoffset
+	// Align tiny pointer for required (conservative) alignment.
+	if constsize&7 == 0 {
+		off = alignUp(off, 8)
+	} else if goarch.PtrSize == 4 && constsize == 12 {
+		// Conservatively align 12-byte objects to 8 bytes on 32-bit
+		// systems so that objects whose first field is a 64-bit
+		// value is aligned to 8 bytes and does not cause a fault on
+		// atomic access. See issue 37262.
+		// TODO(mknyszek): Remove this workaround if/when issue 36606
+		// is resolved.
+		off = alignUp(off, 8)
+	} else if constsize&3 == 0 {
+		off = alignUp(off, 4)
+	} else if constsize&1 == 0 {
+		off = alignUp(off, 2)
+	}
+	if off+constsize <= maxTinySize && c.tiny != 0 {
+		// The object fits into existing tiny block.
+		x := unsafe.Pointer(c.tiny + off)
+		c.tinyoffset = off + constsize
+		c.tinyAllocs++
+		mp.mallocing = 0
+		releasem(mp)
+		return x, 0
+	}
+	// Allocate a new maxTinySize block.
+	checkGCTrigger := false
+	span := c.alloc[tinySpanClass]
+	v := nextFreeFastTiny(span)
+	if v == 0 {
+		v, span, checkGCTrigger = c.nextFree(tinySpanClass)
+	}
+	x := unsafe.Pointer(v)
+	(*[2]uint64)(x)[0] = 0 // Always zero
+	(*[2]uint64)(x)[1] = 0
+	// See if we need to replace the existing tiny block with the new one
+	// based on amount of remaining free space.
+	if !raceenabled && (constsize < c.tinyoffset || c.tiny == 0) {
+		// Note: disabled when race detector is on, see comment near end of this function.
+		c.tiny = uintptr(x)
+		c.tinyoffset = constsize
+	}
+
+	// Ensure that the stores above that initialize x to
+	// type-safe memory and set the heap bits occur before
+	// the caller can make x observable to the garbage
+	// collector. Otherwise, on weakly ordered machines,
+	// the garbage collector could follow a pointer to x,
+	// but see uninitialized memory or stale heap bits.
+	publicationBarrier()
+
+	if writeBarrier.enabled {
+		// Allocate black during GC.
+		// All slots hold nil so no scanning is needed.
+		// This may be racing with GC so do it atomically if there can be
+		// a race marking the bit.
+		gcmarknewobject(span, uintptr(x))
+	} else {
+		// Track the last free index before the mark phase. This field
+		// is only used by the garbage collector. During the mark phase
+		// this is used by the conservative scanner to filter out objects
+		// that are both free and recently-allocated. It's safe to do that
+		// because we allocate-black if the GC is enabled. The conservative
+		// scanner produces pointers out of thin air, so without additional
+		// synchronization it might otherwise observe a partially-initialized
+		// object, which could crash the program.
+		span.freeIndexForScan = span.freeindex
+	}
+
+	// Note cache c only valid while m acquired; see #47302
+	//
+	// N.B. Use the full size because that matches how the GC
+	// will update the mem profile on the "free" side.
+	//
+	// TODO(mknyszek): We should really count the header as part
+	// of gc_sys or something. The code below just pretends it is
+	// internal fragmentation and matches the GC's accounting by
+	// using the whole allocation slot.
+	c.nextSample -= int64(elemsize)
+	if c.nextSample < 0 || MemProfileRate != c.memProfRate {
+		profilealloc(mp, x, elemsize)
+	}
+	mp.mallocing = 0
+	releasem(mp)
+
+	if checkGCTrigger {
+		if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
+			gcStart(t)
+		}
+	}
+
+	if raceenabled {
+		// Pad tinysize allocations so they are aligned with the end
+		// of the tinyalloc region. This ensures that any arithmetic
+		// that goes off the top end of the object will be detectable
+		// by checkptr (issue 38872).
+		// Note that we disable tinyalloc when raceenabled for this to work.
+		// TODO: This padding is only performed when the race detector
+		// is enabled. It would be nice to enable it if any package
+		// was compiled with checkptr, but there's no easy way to
+		// detect that (especially at compile time).
+		// TODO: enable this padding for all allocations, not just
+		// tinyalloc ones. It's tricky because of pointer maps.
+		// Maybe just all noscan objects?
+		x = add(x, elemsize-constsize)
+	}
+	return x, elemsize
+}
+
+// TODO(matloob): Should we let the go compiler inline this instead of using mkmalloc?
+// We won't be able to use elemsize_ but that's probably ok.
+func nextFreeFastTiny(span *mspan) gclinkptr {
+	const nbytes = 8192
+	const nelems = uint16((nbytes - unsafe.Sizeof(spanInlineMarkBits{})) / elemsize_)
+	var nextFreeFastResult gclinkptr
+	if span.allocCache != 0 {
+		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
+		result := span.freeindex + uint16(theBit)
+		if result < nelems {
+			freeidx := result + 1
+			if !(freeidx%64 == 0 && freeidx != nelems) {
+				span.allocCache >>= uint(theBit + 1)
+				span.freeindex = freeidx
+				span.allocCount++
+				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
+			}
+		}
+	}
+	return nextFreeFastResult
+}
+
+func nextFreeFastStub(span *mspan) gclinkptr {
+	var nextFreeFastResult gclinkptr
+	if span.allocCache != 0 {
+		theBit := sys.TrailingZeros64(span.allocCache) // Is there a free object in the allocCache?
+		result := span.freeindex + uint16(theBit)
+		if result < span.nelems {
+			freeidx := result + 1
+			if !(freeidx%64 == 0 && freeidx != span.nelems) {
+				span.allocCache >>= uint(theBit + 1)
+				span.freeindex = freeidx
+				span.allocCount++
+				nextFreeFastResult = gclinkptr(uintptr(result)*elemsize_ + span.base())
+			}
+		}
+	}
+	return nextFreeFastResult
+}
+
+func heapSetTypeNoHeaderStub(x, dataSize uintptr, typ *_type, span *mspan) uintptr {
+	if doubleCheckHeapSetType && (!heapBitsInSpan(dataSize) || !heapBitsInSpan(elemsize_)) {
+		throw("tried to write heap bits, but no heap bits in span")
+	}
+	scanSize := writeHeapBitsSmallStub(span, x, dataSize, typ)
+	if doubleCheckHeapSetType {
+		doubleCheckHeapType(x, dataSize, typ, nil, span)
+	}
+	return scanSize
+}
+
+// writeHeapBitsSmallStub writes the heap bits for small objects whose ptr/scalar data is
+// stored as a bitmap at the end of the span.
+//
+// Assumes dataSize is <= ptrBits*goarch.PtrSize. x must be a pointer into the span.
+// heapBitsInSpan(dataSize) must be true. dataSize must be >= typ.Size_.
+//
+//go:nosplit
+func writeHeapBitsSmallStub(span *mspan, x, dataSize uintptr, typ *_type) uintptr {
+	// The objects here are always really small, so a single load is sufficient.
+	src0 := readUintptr(getGCMask(typ))
+
+	const elemsize = elemsize_
+
+	// Create repetitions of the bitmap if we have a small slice backing store.
+	scanSize := typ.PtrBytes
+	src := src0
+	if typ.Size_ == goarch.PtrSize {
+		src = (1 << (dataSize / goarch.PtrSize)) - 1
+	} else {
+		// N.B. We rely on dataSize being an exact multiple of the type size.
+		// The alternative is to be defensive and mask out src to the length
+		// of dataSize. The purpose is to save on one additional masking operation.
+		if doubleCheckHeapSetType && !asanenabled && dataSize%typ.Size_ != 0 {
+			throw("runtime: (*mspan).writeHeapBitsSmall: dataSize is not a multiple of typ.Size_")
+		}
+		for i := typ.Size_; i < dataSize; i += typ.Size_ {
+			src |= src0 << (i / goarch.PtrSize)
+			scanSize += typ.Size_
+		}
+	}
+
+	// Since we're never writing more than one uintptr's worth of bits, we're either going
+	// to do one or two writes.
+	dstBase, _ := spanHeapBitsRange(span.base(), pageSize, elemsize)
+	dst := unsafe.Pointer(dstBase)
+	o := (x - span.base()) / goarch.PtrSize
+	i := o / ptrBits
+	j := o % ptrBits
+	const bits uintptr = elemsize / goarch.PtrSize
+	// In the if statement below, we have to do two uintptr writes if the bits
+	// we need to write straddle across two different memory locations. But if
+	// the number of bits we're writing divides evenly into the number of bits
+	// in the uintptr we're writing, this can never happen. Since bitsIsPowerOfTwo
+	// is a compile-time constant in the generated code, in the case where the size is
+	// a power of two less than or equal to ptrBits, the compiler can remove the
+	// 'two writes' branch of the if statement and always do only one write without
+	// the check.
+	const bitsIsPowerOfTwo = bits&(bits-1) == 0
+	if bits > ptrBits || (!bitsIsPowerOfTwo && j+bits > ptrBits) {
+		// Two writes.
+		bits0 := ptrBits - j
+		bits1 := bits - bits0
+		dst0 := (*uintptr)(add(dst, (i+0)*goarch.PtrSize))
+		dst1 := (*uintptr)(add(dst, (i+1)*goarch.PtrSize))
+		*dst0 = (*dst0)&(^uintptr(0)>>bits0) | (src << j)
+		*dst1 = (*dst1)&^((1<<bits1)-1) | (src >> bits0)
+	} else {
+		// One write.
+		dst := (*uintptr)(add(dst, i*goarch.PtrSize))
+		*dst = (*dst)&^(((1<<(min(bits, ptrBits)))-1)<<j) | (src << j) // We're taking the min so this compiles on 32 bit platforms. But if bits > ptrbits we always take the other branch
+	}
+
+	const doubleCheck = false
+	if doubleCheck {
+		writeHeapBitsDoubleCheck(span, x, dataSize, src, src0, i, j, bits, typ)
+	}
+	return scanSize
+}
+
+func writeHeapBitsDoubleCheck(span *mspan, x, dataSize, src, src0, i, j, bits uintptr, typ *_type) {
+	srcRead := span.heapBitsSmallForAddr(x)
+	if srcRead != src {
+		print("runtime: x=", hex(x), " i=", i, " j=", j, " bits=", bits, "\n")
+		print("runtime: dataSize=", dataSize, " typ.Size_=", typ.Size_, " typ.PtrBytes=", typ.PtrBytes, "\n")
+		print("runtime: src0=", hex(src0), " src=", hex(src), " srcRead=", hex(srcRead), "\n")
+		throw("bad pointer bits written for small object")
+	}
+}
--- a/src/runtime/malloc_tables_generated.go
+++ b/src/runtime/malloc_tables_generated.go
--- a/src/runtime/malloc_tables_plan9.go
+++ b/src/runtime/malloc_tables_plan9.go
@ -0,0 +1,14 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build plan9
+
+package runtime
+
+import "unsafe"
+
+var (
+	mallocScanTable   []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer
+	mallocNoScanTable []func(size uintptr, typ *_type, needzero bool) unsafe.Pointer
+)
--- a/src/runtime/malloc_test.go
+++ b/src/runtime/malloc_test.go
@ -452,3 +452,13 @@ func BenchmarkGoroutineIdle(b *testing.B) {
 	close(quit)
 	time.Sleep(10 * time.Millisecond)
 }
+
+func TestMkmalloc(t *testing.T) {
+	testenv.MustHaveGoRun(t)
+	testenv.MustHaveExternalNetwork(t) // To download the golang.org/x/tools dependency.
+	output, err := exec.Command("go", "-C", "_mkmalloc", "test").CombinedOutput()
+	t.Logf("test output:\n%s", output)
+	if err != nil {
+		t.Errorf("_mkmalloc tests failed: %v", err)
+	}
+}
--- a/src/runtime/mcheckmark.go
+++ b/src/runtime/mcheckmark.go
@ -68,7 +68,7 @@ func startCheckmarks() {

 // endCheckmarks ends the checkmarks phase.
 func endCheckmarks() {
-	if gcMarkWorkAvailable(nil) {
+	if !gcIsMarkDone() {
 		throw("GC work not flushed")
 	}
 	useCheckmark = false
--- a/src/runtime/mgc.go
+++ b/src/runtime/mgc.go
@ -326,7 +326,7 @@ type workType struct {
 	full  lfstack          // lock-free list of full blocks workbuf
 	_     cpu.CacheLinePad // prevents false-sharing between full and empty
 	empty lfstack          // lock-free list of empty blocks workbuf
-	_     cpu.CacheLinePad // prevents false-sharing between empty and nproc/nwait
+	_     cpu.CacheLinePad // prevents false-sharing between empty and wbufSpans

 	wbufSpans struct {
 		lock mutex
@ -337,12 +337,24 @@ type workType struct {
 		// one of the workbuf lists.
 		busy mSpanList
 	}
-	_ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanq
+	_ cpu.CacheLinePad // prevents false-sharing between wbufSpans and spanWorkMask

-	// Global queue of spans to scan.
+	// spanqMask is a bitmap indicating which Ps have local work worth stealing.
+	// Set or cleared by the owning P, cleared by stealing Ps.
+	//
+	// spanqMask is like a proxy for a global queue. An important invariant is that
+	// forced flushing like gcw.dispose must set this bit on any P that has local
+	// span work.
+	spanqMask pMask
+	_         cpu.CacheLinePad // prevents false-sharing between spanqMask and everything else
+
+	// List of all spanSPMCs.
 	//
 	// Only used if goexperiment.GreenTeaGC.
-	spanq spanQueue
+	spanSPMCs struct {
+		lock mutex // no lock rank because it's a leaf lock (see mklockrank.go).
+		all  *spanSPMC
+	}

 	// Restore 64-bit alignment on 32-bit.
 	// _ uint32
@ -711,8 +723,9 @@ func gcStart(trigger gcTrigger) {
 		traceRelease(trace)
 	}

-	// Check that all Ps have finished deferred mcache flushes.
+	// Check and setup per-P state.
 	for _, p := range allp {
+		// Check that all Ps have finished deferred mcache flushes.
 		if fg := p.mcache.flushGen.Load(); fg != mheap_.sweepgen {
 			println("runtime: p", p.id, "flushGen", fg, "!= sweepgen", mheap_.sweepgen)
 			throw("p mcache not flushed")
@ -869,10 +882,11 @@ var gcDebugMarkDone struct {
 // all local work to the global queues where it can be discovered by
 // other workers.
 //
+// All goroutines performing GC work must call gcBeginWork to signal
+// that they're executing GC work. They must call gcEndWork when done.
 // This should be called when all local mark work has been drained and
-// there are no remaining workers. Specifically, when
-//
-//	work.nwait == work.nproc && !gcMarkWorkAvailable(p)
+// there are no remaining workers. Specifically, when gcEndWork returns
+// true.
 //
 // The calling context must be preemptible.
 //
@ -896,7 +910,7 @@ top:
 	// empty before performing the ragged barrier. Otherwise,
 	// there could be global work that a P could take after the P
 	// has passed the ragged barrier.
-	if !(gcphase == _GCmark && work.nwait == work.nproc && !gcMarkWorkAvailable(nil)) {
+	if !(gcphase == _GCmark && gcIsMarkDone()) {
 		semrelease(&work.markDoneSema)
 		return
 	}
@ -922,6 +936,7 @@ top:
 		// TODO(austin): Break up these workbufs to
 		// better distribute work.
 		pp.gcw.dispose()
+
 		// Collect the flushedWork flag.
 		if pp.gcw.flushedWork {
 			atomic.Xadd(&gcMarkDoneFlushed, 1)
@ -1514,11 +1529,7 @@ func gcBgMarkWorker(ready chan struct{}) {
 			trackLimiterEvent = pp.limiterEvent.start(limiterEventIdleMarkWork, startTime)
 		}

-		decnwait := atomic.Xadd(&work.nwait, -1)
-		if decnwait == work.nproc {
-			println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
-			throw("work.nwait was > work.nproc")
-		}
+		gcBeginWork()

 		systemstack(func() {
 			// Mark our goroutine preemptible so its stack can be scanned or observed
@ -1570,15 +1581,6 @@ func gcBgMarkWorker(ready chan struct{}) {
 			atomic.Xaddint64(&pp.gcFractionalMarkTime, duration)
 		}

-		// Was this the last worker and did we run out
-		// of work?
-		incnwait := atomic.Xadd(&work.nwait, +1)
-		if incnwait > work.nproc {
-			println("runtime: p.gcMarkWorkerMode=", pp.gcMarkWorkerMode,
-				"work.nwait=", incnwait, "work.nproc=", work.nproc)
-			throw("work.nwait > work.nproc")
-		}
-
 		// We'll releasem after this point and thus this P may run
 		// something else. We must clear the worker mode to avoid
 		// attributing the mode to a different (non-worker) G in
@ -1587,7 +1589,7 @@ func gcBgMarkWorker(ready chan struct{}) {

 		// If this worker reached a background mark completion
 		// point, signal the main GC goroutine.
-		if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
+		if gcEndWork() {
 			// We don't need the P-local buffers here, allow
 			// preemption because we may schedule like a regular
 			// goroutine in gcMarkDone (block on locks, etc).
@ -1599,20 +1601,40 @@ func gcBgMarkWorker(ready chan struct{}) {
 	}
 }

-// gcMarkWorkAvailable reports whether executing a mark worker
-// on p is potentially useful. p may be nil, in which case it only
-// checks the global sources of work.
-func gcMarkWorkAvailable(p *p) bool {
+// gcShouldScheduleWorker reports whether executing a mark worker
+// on p is potentially useful. p may be nil.
+func gcShouldScheduleWorker(p *p) bool {
 	if p != nil && !p.gcw.empty() {
 		return true
 	}
-	if !work.full.empty() || !work.spanq.empty() {
-		return true // global work available
+	return gcMarkWorkAvailable()
+}
+
+// gcIsMarkDone reports whether the mark phase is (probably) done.
+func gcIsMarkDone() bool {
+	return work.nwait == work.nproc && !gcMarkWorkAvailable()
+}
+
+// gcBeginWork signals to the garbage collector that a new worker is
+// about to process GC work.
+func gcBeginWork() {
+	decnwait := atomic.Xadd(&work.nwait, -1)
+	if decnwait == work.nproc {
+		println("runtime: work.nwait=", decnwait, "work.nproc=", work.nproc)
+		throw("work.nwait was > work.nproc")
 	}
-	if work.markrootNext < work.markrootJobs {
-		return true // root scan work available
+}
+
+// gcEndWork signals to the garbage collector that a new worker has just finished
+// its work. It reports whether it was the last worker and there's no more work
+// to do. If it returns true, the caller must call gcMarkDone.
+func gcEndWork() (last bool) {
+	incnwait := atomic.Xadd(&work.nwait, +1)
+	if incnwait > work.nproc {
+		println("runtime: work.nwait=", incnwait, "work.nproc=", work.nproc)
+		throw("work.nwait > work.nproc")
 	}
-	return false
+	return incnwait == work.nproc && !gcMarkWorkAvailable()
 }

 // gcMark runs the mark (or, for concurrent GC, mark termination)
@ -1625,8 +1647,8 @@ func gcMark(startTime int64) {
 	work.tstart = startTime

 	// Check that there's no marking work remaining.
-	if work.full != 0 || work.markrootNext < work.markrootJobs || !work.spanq.empty() {
-		print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, " spanq.n=", work.spanq.size(), "\n")
+	if work.full != 0 || work.markrootNext < work.markrootJobs {
+		print("runtime: full=", hex(work.full), " next=", work.markrootNext, " jobs=", work.markrootJobs, " nDataRoots=", work.nDataRoots, " nBSSRoots=", work.nBSSRoots, " nSpanRoots=", work.nSpanRoots, " nStackRoots=", work.nStackRoots, "\n")
 		panic("non-empty mark queue after concurrent mark")
 	}

@ -1742,10 +1764,12 @@ func gcSweep(mode gcMode) bool {
 		// Sweep all spans eagerly.
 		for sweepone() != ^uintptr(0) {
 		}
-		// Free workbufs eagerly.
+		// Free workbufs and span rings eagerly.
 		prepareFreeWorkbufs()
 		for freeSomeWbufs(false) {
 		}
+		for freeSomeSpanSPMCs(false) {
+		}
 		// All "free" events for this mark/sweep cycle have
 		// now happened, so we can make this profile cycle
 		// available immediately.
--- a/src/runtime/mgcmark.go
+++ b/src/runtime/mgcmark.go
@ -666,6 +666,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 		gp.gcAssistBytes = 0
 		return
 	}
+
 	// Track time spent in this assist. Since we're on the
 	// system stack, this is non-preemptible, so we can
 	// just measure start and end time.
@ -675,11 +676,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {
 	startTime := nanotime()
 	trackLimiterEvent := gp.m.p.ptr().limiterEvent.start(limiterEventMarkAssist, startTime)

-	decnwait := atomic.Xadd(&work.nwait, -1)
-	if decnwait == work.nproc {
-		println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
-		throw("nwait > work.nprocs")
-	}
+	gcBeginWork()

 	// gcDrainN requires the caller to be preemptible.
 	casGToWaitingForSuspendG(gp, _Grunning, waitReasonGCAssistMarking)
@ -702,14 +699,7 @@ func gcAssistAlloc1(gp *g, scanWork int64) {

 	// If this is the last worker and we ran out of work,
 	// signal a completion point.
-	incnwait := atomic.Xadd(&work.nwait, +1)
-	if incnwait > work.nproc {
-		println("runtime: work.nwait=", incnwait,
-			"work.nproc=", work.nproc)
-		throw("work.nwait > work.nproc")
-	}
-
-	if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
+	if gcEndWork() {
 		// This has reached a background completion point. Set
 		// gp.param to a non-nil value to indicate this. It
 		// doesn't matter what we set it to (it just has to be
@ -1242,14 +1232,18 @@ func gcDrain(gcw *gcWork, flags gcDrainFlags) {
 		var b uintptr
 		var s objptr
 		if b = gcw.tryGetObjFast(); b == 0 {
-			if s = gcw.tryGetSpan(false); s == 0 {
+			if s = gcw.tryGetSpanFast(); s == 0 {
 				if b = gcw.tryGetObj(); b == 0 {
-					// Flush the write barrier
-					// buffer; this may create
-					// more work.
-					wbBufFlush()
-					if b = gcw.tryGetObj(); b == 0 {
-						s = gcw.tryGetSpan(true)
+					if s = gcw.tryGetSpan(); s == 0 {
+						// Flush the write barrier
+						// buffer; this may create
+						// more work.
+						wbBufFlush()
+						if b = gcw.tryGetObj(); b == 0 {
+							if s = gcw.tryGetSpan(); s == 0 {
+								s = gcw.tryStealSpan()
+							}
+						}
 					}
 				}
 			}
@ -1338,22 +1332,26 @@ func gcDrainN(gcw *gcWork, scanWork int64) int64 {
 		var b uintptr
 		var s objptr
 		if b = gcw.tryGetObjFast(); b == 0 {
-			if s = gcw.tryGetSpan(false); s == 0 {
+			if s = gcw.tryGetSpanFast(); s == 0 {
 				if b = gcw.tryGetObj(); b == 0 {
-					// Flush the write barrier
-					// buffer; this may create
-					// more work.
-					wbBufFlush()
-					if b = gcw.tryGetObj(); b == 0 {
-						// Try to do a root job.
-						if work.markrootNext < work.markrootJobs {
-							job := atomic.Xadd(&work.markrootNext, +1) - 1
-							if job < work.markrootJobs {
-								workFlushed += markroot(gcw, job, false)
-								continue
+					if s = gcw.tryGetSpan(); s == 0 {
+						// Flush the write barrier
+						// buffer; this may create
+						// more work.
+						wbBufFlush()
+						if b = gcw.tryGetObj(); b == 0 {
+							if s = gcw.tryGetSpan(); s == 0 {
+								// Try to do a root job.
+								if work.markrootNext < work.markrootJobs {
+									job := atomic.Xadd(&work.markrootNext, +1) - 1
+									if job < work.markrootJobs {
+										workFlushed += markroot(gcw, job, false)
+										continue
+									}
+								}
+								s = gcw.tryStealSpan()
 							}
 						}
-						s = gcw.tryGetSpan(true)
 					}
 				}
 			}
--- a/src/runtime/mgcmark_greenteagc.go
+++ b/src/runtime/mgcmark_greenteagc.go
@ -37,10 +37,10 @@
 package runtime

 import (
-	"internal/cpu"
 	"internal/goarch"
 	"internal/runtime/atomic"
 	"internal/runtime/gc"
+	"internal/runtime/gc/scan"
 	"internal/runtime/sys"
 	"unsafe"
 )
@ -259,7 +259,7 @@ func gcUsesSpanInlineMarkBits(size uintptr) bool {
 	return heapBitsInSpan(size) && size >= 16
 }

-// tryQueueOnSpan tries to queue p on the span it points to, if it
+// tryDeferToSpanScan tries to queue p on the span it points to, if it
 // points to a small object span (gcUsesSpanQueue size).
 func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
 	if useCheckmark {
@ -299,6 +299,12 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
 	if q.tryAcquire() {
 		if gcw.spanq.put(makeObjPtr(base, objIndex)) {
 			if gcphase == _GCmark {
+				// This is intentionally racy; the bit set here might get
+				// stomped on by a stealing P. See the comment in tryStealSpan
+				// for an explanation as to why this is OK.
+				if !work.spanqMask.read(uint32(gcw.id)) {
+					work.spanqMask.set(gcw.id)
+				}
 				gcw.mayNeedWorker = true
 			}
 			gcw.flushedWork = true
@ -307,260 +313,487 @@ func tryDeferToSpanScan(p uintptr, gcw *gcWork) bool {
 	return true
 }

+// tryGetSpanFast attempts to get an entire span to scan.
+func (w *gcWork) tryGetSpanFast() objptr {
+	return w.spanq.tryGetFast()
+}
+
 // tryGetSpan attempts to get an entire span to scan.
-func (w *gcWork) tryGetSpan(slow bool) objptr {
-	if s := w.spanq.get(); s != 0 {
+func (w *gcWork) tryGetSpan() objptr {
+	if s := w.spanq.tryGetFast(); s != 0 {
 		return s
 	}
-
-	if slow {
-		// Check the global span queue.
-		if s := work.spanq.get(w); s != 0 {
-			return s
-		}
-
-		// Attempt to steal spans to scan from other Ps.
-		return spanQueueSteal(w)
+	// "Steal" from ourselves.
+	if s := w.spanq.steal(&w.spanq); s != 0 {
+		return s
+	}
+	// We failed to get any local work, so we're fresh out.
+	// Nobody else is going to add work for us. Clear our bit.
+	if work.spanqMask.read(uint32(w.id)) {
+		work.spanqMask.clear(w.id)
 	}
 	return 0
 }

-// spanQueue is a concurrent safe queue of mspans. Each mspan is represented
-// as an objptr whose spanBase is the base address of the span.
+// spanQueue is a P-local stealable span queue.
 type spanQueue struct {
-	avail atomic.Bool      // optimization to check emptiness w/o the lock
-	_     cpu.CacheLinePad // prevents false-sharing between lock and avail
-	lock  mutex
-	q     mSpanQueue
-}
+	// head, tail, and ring represent a local non-thread-safe ring buffer.
+	head, tail uint32
+	ring       [256]objptr

-func (q *spanQueue) empty() bool {
-	return !q.avail.Load()
-}
+	// putsSinceDrain counts the number of put calls since the last drain.
+	putsSinceDrain int

-func (q *spanQueue) size() int {
-	return q.q.n
-}
+	// chain contains state visible to other Ps.
+	//
+	// In particular, that means a linked chain of single-producer multi-consumer
+	// ring buffers where the single producer is this P only.
+	//
+	// This linked chain structure is based off the sync.Pool dequeue.
+	chain struct {
+		// head is the spanSPMC to put to. This is only accessed
+		// by the producer, so doesn't need to be synchronized.
+		head *spanSPMC

-// putBatch adds a whole batch of spans to the queue.
-func (q *spanQueue) putBatch(batch []objptr) {
-	var list mSpanQueue
-	for _, p := range batch {
-		s := spanOfUnchecked(p.spanBase())
-		s.scanIdx = p.objIndex()
-		list.push(s)
-	}
-
-	lock(&q.lock)
-	if q.q.n == 0 {
-		q.avail.Store(true)
-	}
-	q.q.takeAll(&list)
-	unlock(&q.lock)
-}
-
-// get tries to take a span off the queue.
-//
-// Returns a non-zero objptr on success. Also, moves additional
-// spans to gcw's local span queue.
-func (q *spanQueue) get(gcw *gcWork) objptr {
-	if q.empty() {
-		return 0
-	}
-	lock(&q.lock)
-	if q.q.n == 0 {
-		unlock(&q.lock)
-		return 0
-	}
-	n := q.q.n/int(gomaxprocs) + 1
-	if n > q.q.n {
-		n = q.q.n
-	}
-	if max := len(gcw.spanq.ring) / 2; n > max {
-		n = max
-	}
-	newQ := q.q.popN(n)
-	if q.q.n == 0 {
-		q.avail.Store(false)
-	}
-	unlock(&q.lock)
-
-	s := newQ.pop()
-	for newQ.n > 0 {
-		s := newQ.pop()
-		gcw.spanq.put(makeObjPtr(s.base(), s.scanIdx))
-	}
-	return makeObjPtr(s.base(), s.scanIdx)
-}
-
-// localSpanQueue is a P-local ring buffer of objptrs that represent spans.
-// Accessed without a lock.
-//
-// Multi-consumer, single-producer. The only producer is the P that owns this
-// queue, but any other P may consume from it.
-//
-// This is based on the scheduler runqueues. If making changes there, consider
-// also making them here.
-type localSpanQueue struct {
-	head atomic.Uint32
-	tail atomic.Uint32
-	ring [256]objptr
-}
-
-// put adds s to the queue. Returns true if put flushed to the global queue
-// because it was full.
-func (q *localSpanQueue) put(s objptr) (flushed bool) {
-	for {
-		h := q.head.Load() // synchronize with consumers
-		t := q.tail.Load()
-		if t-h < uint32(len(q.ring)) {
-			q.ring[t%uint32(len(q.ring))] = s
-			q.tail.Store(t + 1) // Makes the item avail for consumption.
-			return false
-		}
-		if q.putSlow(s, h, t) {
-			return true
-		}
-		// The queue is not full, now the put above must succeed.
+		// tail is the spanSPMC to steal from. This is accessed
+		// by consumers, so reads and writes must be atomic.
+		tail atomic.UnsafePointer // *spanSPMC
 	}
 }

-// putSlow is a helper for put to move spans to the global queue.
-// Returns true on success, false on failure (nothing moved).
-func (q *localSpanQueue) putSlow(s objptr, h, t uint32) bool {
-	var batch [len(q.ring)/2 + 1]objptr
-
-	// First, grab a batch from local queue.
-	n := t - h
-	n = n / 2
-	if n != uint32(len(q.ring)/2) {
-		throw("localSpanQueue.putSlow: queue is not full")
-	}
-	for i := uint32(0); i < n; i++ {
-		batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
-	}
-	if !q.head.CompareAndSwap(h, h+n) { // Commits consume.
+// putFast tries to put s onto the queue, but may fail if it's full.
+func (q *spanQueue) putFast(s objptr) (ok bool) {
+	if q.tail-q.head == uint32(len(q.ring)) {
 		return false
 	}
-	batch[n] = s
-
-	work.spanq.putBatch(batch[:])
+	q.ring[q.tail%uint32(len(q.ring))] = s
+	q.tail++
 	return true
 }

-// get attempts to take a span off the queue. Might fail if the
-// queue is empty. May be called by multiple threads, but callers
-// are better off using stealFrom to amortize the cost of stealing.
-// This method is intended for use by the owner of this queue.
-func (q *localSpanQueue) get() objptr {
-	for {
-		h := q.head.Load()
-		t := q.tail.Load()
-		if t == h {
-			return 0
+// put puts s onto the queue.
+//
+// Returns whether the caller should spin up a new worker.
+func (q *spanQueue) put(s objptr) bool {
+	// The constants below define the period of and volume of
+	// spans we spill to the spmc chain when the local queue is
+	// not full.
+	//
+	// spillPeriod must be > spillMax, otherwise that sets the
+	// effective maximum size of our local span queue. Even if
+	// we have a span ring of size N, but we flush K spans every
+	// K puts, then K becomes our effective maximum length. When
+	// spillPeriod > spillMax, then we're always spilling spans
+	// at a slower rate than we're accumulating them.
+	const (
+		// spillPeriod defines how often to check if we should
+		// spill some spans, counted in the number of calls to put.
+		spillPeriod = 64
+
+		// spillMax defines, at most, how many spans to drain with
+		// each spill.
+		spillMax = 16
+	)
+
+	if q.putFast(s) {
+		// Occasionally try to spill some work to generate parallelism.
+		q.putsSinceDrain++
+		if q.putsSinceDrain >= spillPeriod {
+			// Reset even if we don't drain, so we don't check every time.
+			q.putsSinceDrain = 0
+
+			// Try to drain some spans. Don't bother if there's very
+			// few of them or there's already spans in the spmc chain.
+			n := min((q.tail-q.head)/2, spillMax)
+			if n > 4 && q.chainEmpty() {
+				q.drain(n)
+				return true
+			}
 		}
-		s := q.ring[h%uint32(len(q.ring))]
-		if q.head.CompareAndSwap(h, h+1) {
+		return false
+	}
+
+	// We're out of space. Drain out our local spans.
+	q.drain(uint32(len(q.ring)) / 2)
+	if !q.putFast(s) {
+		throw("failed putFast after drain")
+	}
+	return true
+}
+
+// flush publishes all spans in the local queue to the spmc chain.
+func (q *spanQueue) flush() {
+	n := q.tail - q.head
+	if n == 0 {
+		return
+	}
+	q.drain(n)
+}
+
+// empty returns true if there's no more work on the queue.
+//
+// Not thread-safe. Must only be called by the owner of q.
+func (q *spanQueue) empty() bool {
+	// Check the local queue for work.
+	if q.tail-q.head > 0 {
+		return false
+	}
+	return q.chainEmpty()
+}
+
+// chainEmpty returns true if the spmc chain is empty.
+//
+// Thread-safe.
+func (q *spanQueue) chainEmpty() bool {
+	// Check the rest of the rings for work.
+	r := (*spanSPMC)(q.chain.tail.Load())
+	for r != nil {
+		if !r.empty() {
+			return false
+		}
+		r = (*spanSPMC)(r.prev.Load())
+	}
+	return true
+}
+
+// drain publishes n spans from the local queue to the spmc chain.
+func (q *spanQueue) drain(n uint32) {
+	q.putsSinceDrain = 0
+
+	if q.chain.head == nil {
+		// N.B. We target 1024, but this may be bigger if the physical
+		// page size is bigger, or if we can fit more uintptrs into a
+		// physical page. See newSpanSPMC docs.
+		r := newSpanSPMC(1024)
+		q.chain.head = r
+		q.chain.tail.StoreNoWB(unsafe.Pointer(r))
+	}
+
+	// Try to drain some of the queue to the head spmc.
+	if q.tryDrain(q.chain.head, n) {
+		return
+	}
+	// No space. Create a bigger spmc and add it to the chain.
+
+	// Double the size of the next one, up to a maximum.
+	//
+	// We double each time so we can avoid taking this slow path
+	// in the future, which involves a global lock. Ideally we want
+	// to hit a steady-state where the deepest any queue goes during
+	// a mark phase can fit in the ring.
+	//
+	// However, we still set a maximum on this. We set the maximum
+	// to something large to amortize the cost of lock acquisition, but
+	// still at a reasonable size for big heaps and/or a lot of Ps (which
+	// tend to be correlated).
+	//
+	// It's not too bad to burn relatively large-but-fixed amounts of per-P
+	// memory if we need to deal with really, really deep queues, since the
+	// constants of proportionality are small. Simultaneously, we want to
+	// avoid a situation where a single worker ends up queuing O(heap)
+	// work and then forever retains a queue of that size.
+	const maxCap = 1 << 20 / goarch.PtrSize
+	newCap := q.chain.head.cap * 2
+	if newCap > maxCap {
+		newCap = maxCap
+	}
+	newHead := newSpanSPMC(newCap)
+	if !q.tryDrain(newHead, n) {
+		throw("failed to put span on newly-allocated spanSPMC")
+	}
+	q.chain.head.prev.StoreNoWB(unsafe.Pointer(newHead))
+	q.chain.head = newHead
+}
+
+// tryDrain attempts to drain n spans from q's local queue to the chain.
+//
+// Returns whether it succeeded.
+func (q *spanQueue) tryDrain(r *spanSPMC, n uint32) bool {
+	if q.head+n > q.tail {
+		throw("attempt to drain too many elements")
+	}
+	h := r.head.Load() // synchronize with consumers
+	t := r.tail.Load()
+	rn := t - h
+	if rn+n <= r.cap {
+		for i := uint32(0); i < n; i++ {
+			*r.slot(t + i) = q.ring[(q.head+i)%uint32(len(q.ring))]
+		}
+		r.tail.Store(t + n) // Makes the items avail for consumption.
+		q.head += n
+		return true
+	}
+	return false
+}
+
+// tryGetFast attempts to get a span from the local queue, but may fail if it's empty,
+// returning false.
+func (q *spanQueue) tryGetFast() objptr {
+	if q.tail-q.head == 0 {
+		return 0
+	}
+	s := q.ring[q.head%uint32(len(q.ring))]
+	q.head++
+	return s
+}
+
+// steal takes some spans from the ring chain of another span queue.
+//
+// q == q2 is OK.
+func (q *spanQueue) steal(q2 *spanQueue) objptr {
+	r := (*spanSPMC)(q2.chain.tail.Load())
+	if r == nil {
+		return 0
+	}
+	for {
+		// It's important that we load the next pointer
+		// *before* popping the tail. In general, r may be
+		// transiently empty, but if next is non-nil before
+		// the pop and the pop fails, then r is permanently
+		// empty, which is the only condition under which it's
+		// safe to drop r from the chain.
+		r2 := (*spanSPMC)(r.prev.Load())
+
+		// Try to refill from one of the rings
+		if s := q.refill(r); s != 0 {
 			return s
 		}
+
+		if r2 == nil {
+			// This is the only ring. It's empty right
+			// now, but could be pushed to in the future.
+			return 0
+		}
+
+		// The tail of the chain has been drained, so move on
+		// to the next ring. Try to drop it from the chain
+		// so the next consumer doesn't have to look at the empty
+		// ring again.
+		if q2.chain.tail.CompareAndSwapNoWB(unsafe.Pointer(r), unsafe.Pointer(r2)) {
+			r.dead.Store(true)
+		}
+
+		r = r2
 	}
 }

-func (q *localSpanQueue) empty() bool {
-	h := q.head.Load()
-	t := q.tail.Load()
-	return t == h
-}
-
-// stealFrom takes spans from q2 and puts them into q1. One span is removed
-// from the stolen spans and returned on success. Failure to steal returns a
-// zero objptr.
-func (q1 *localSpanQueue) stealFrom(q2 *localSpanQueue) objptr {
-	writeHead := q1.tail.Load()
+// refill takes some spans from r and puts them into q's local queue.
+//
+// One span is removed from the stolen spans and returned on success.
+// Failure to steal returns a zero objptr.
+//
+// steal is thread-safe with respect to r.
+func (q *spanQueue) refill(r *spanSPMC) objptr {
+	if q.tail-q.head != 0 {
+		throw("steal with local work available")
+	}

+	// Steal some spans.
 	var n uint32
 	for {
-		h := q2.head.Load() // load-acquire, synchronize with other consumers
-		t := q2.tail.Load() // load-acquire, synchronize with the producer
+		h := r.head.Load() // load-acquire, synchronize with other consumers
+		t := r.tail.Load() // load-acquire, synchronize with the producer
 		n = t - h
 		n = n - n/2
 		if n == 0 {
 			return 0
 		}
-		if n > uint32(len(q2.ring)/2) { // read inconsistent h and t
+		if n > r.cap { // read inconsistent h and t
 			continue
 		}
+		n = min(n, uint32(len(q.ring)/2))
 		for i := uint32(0); i < n; i++ {
-			c := q2.ring[(h+i)%uint32(len(q2.ring))]
-			q1.ring[(writeHead+i)%uint32(len(q1.ring))] = c
+			q.ring[i] = *r.slot(h + i)
 		}
-		if q2.head.CompareAndSwap(h, h+n) {
+		if r.head.CompareAndSwap(h, h+n) {
 			break
 		}
 	}
-	n--
-	c := q1.ring[(writeHead+n)%uint32(len(q1.ring))]
-	if n == 0 {
-		return c
-	}
-	h := q1.head.Load()
-	if writeHead-h+n >= uint32(len(q1.ring)) {
-		throw("localSpanQueue.stealFrom: queue overflow")
-	}
-	q1.tail.Store(writeHead + n)
-	return c
+
+	// Update local queue head and tail to reflect new buffered values.
+	q.head = 0
+	q.tail = n
+
+	// Pop off the head of the queue and return it.
+	return q.tryGetFast()
 }

-// drain moves all spans in the queue to the global queue.
+// spanSPMC is a ring buffer of objptrs that represent spans.
+// Accessed without a lock.
 //
-// Returns true if anything was moved.
-func (q *localSpanQueue) drain() bool {
-	var batch [len(q.ring)]objptr
+// Single-producer, multi-consumer. The only producer is the P that owns this
+// queue, but any other P may consume from it.
+//
+// ## Invariants for memory management
+//
+// 1. All spanSPMCs are allocated from mheap_.spanSPMCAlloc.
+// 2. All allocated spanSPMCs must be on the work.spanSPMCs list.
+// 3. spanSPMCs may only be allocated if gcphase != _GCoff.
+// 4. spanSPMCs may only be deallocated if gcphase == _GCoff.
+//
+// Invariants (3) and (4) ensure that we do not need to concern ourselves with
+// tricky reuse issues that stem from not knowing when a thread is truly done
+// with a spanSPMC. For example, two threads could load the same spanSPMC from
+// the tail of the chain. One thread is then paused while the other steals the
+// last few elements off of it. It's not safe to free at that point since the
+// other thread will still inspect that spanSPMC, and we have no way of knowing
+// without more complex and/or heavyweight synchronization.
+//
+// Instead, we rely on the global synchronization inherent to GC phases, and
+// the fact that spanSPMCs are only ever used during the mark phase, to ensure
+// memory safety. This means we temporarily waste some memory, but it's only
+// until the end of the mark phase.
+type spanSPMC struct {
+	_ sys.NotInHeap

-	var n uint32
-	for {
-		var h uint32
-		for {
-			h = q.head.Load()
-			t := q.tail.Load()
-			n = t - h
-			if n == 0 {
-				return false
-			}
-			if n <= uint32(len(q.ring)) {
-				break
-			}
-			// Read inconsistent h and t.
-		}
-		for i := uint32(0); i < n; i++ {
-			batch[i] = q.ring[(h+i)%uint32(len(q.ring))]
-		}
-		if q.head.CompareAndSwap(h, h+n) { // Commits consume.
-			break
-		}
-	}
-	if !q.empty() {
-		throw("drained local span queue, but not empty")
-	}
+	// allnext is the link to the next spanSPMC on the work.spanSPMCs list.
+	// This is used to find and free dead spanSPMCs. Protected by
+	// work.spanSPMCs.lock.
+	allnext *spanSPMC

-	work.spanq.putBatch(batch[:n])
-	return true
+	// dead indicates whether the spanSPMC is no longer in use.
+	// Protected by the CAS to the prev field of the spanSPMC pointing
+	// to this spanSPMC. That is, whoever wins that CAS takes ownership
+	// of marking this spanSPMC as dead. See spanQueue.steal for details.
+	dead atomic.Bool
+
+	// prev is the next link up a spanQueue's SPMC chain, from tail to head,
+	// hence the name "prev." Set by a spanQueue's producer, cleared by a
+	// CAS in spanQueue.steal.
+	prev atomic.UnsafePointer // *spanSPMC
+
+	// head, tail, cap, and ring together represent a fixed-size SPMC lock-free
+	// ring buffer of size cap. The ring buffer contains objptr values.
+	head atomic.Uint32
+	tail atomic.Uint32
+	cap  uint32 // cap(ring))
+	ring *objptr
 }

-// spanQueueSteal attempts to steal a span from another P's local queue.
+// newSpanSPMC allocates and initializes a new spmc with the provided capacity.
+//
+// newSpanSPMC may override the capacity with a larger one if the provided one would
+// waste memory.
+func newSpanSPMC(cap uint32) *spanSPMC {
+	lock(&work.spanSPMCs.lock)
+	r := (*spanSPMC)(mheap_.spanSPMCAlloc.alloc())
+	r.allnext = work.spanSPMCs.all
+	work.spanSPMCs.all = r
+	unlock(&work.spanSPMCs.lock)
+
+	// If cap < the capacity of a single physical page, round up.
+	pageCap := uint32(physPageSize / goarch.PtrSize) // capacity of a single page
+	if cap < pageCap {
+		cap = pageCap
+	}
+	if cap&(cap-1) != 0 {
+		throw("spmc capacity must be a power of 2")
+	}
+
+	r.cap = cap
+	ring := sysAlloc(uintptr(cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys, "GC span queue")
+	atomic.StorepNoWB(unsafe.Pointer(&r.ring), ring)
+	return r
+}
+
+// empty returns true if the spmc is empty.
+//
+// empty is thread-safe.
+func (r *spanSPMC) empty() bool {
+	h := r.head.Load()
+	t := r.tail.Load()
+	return t == h
+}
+
+// deinit frees any resources the spanSPMC is holding onto and zeroes it.
+func (r *spanSPMC) deinit() {
+	sysFree(unsafe.Pointer(r.ring), uintptr(r.cap)*unsafe.Sizeof(objptr(0)), &memstats.gcMiscSys)
+	r.ring = nil
+	r.dead.Store(false)
+	r.prev.StoreNoWB(nil)
+	r.head.Store(0)
+	r.tail.Store(0)
+	r.cap = 0
+}
+
+// slot returns a pointer to slot i%r.cap.
+func (r *spanSPMC) slot(i uint32) *objptr {
+	idx := uintptr(i & (r.cap - 1))
+	return (*objptr)(unsafe.Add(unsafe.Pointer(r.ring), idx*unsafe.Sizeof(objptr(0))))
+}
+
+// freeSomeSpanSPMCs frees some spanSPMCs back to the OS and returns
+// true if it should be called again to free more.
+func freeSomeSpanSPMCs(preemptible bool) bool {
+	// TODO(mknyszek): This is arbitrary, but some kind of limit is necessary
+	// to help bound delays to cooperatively preempt ourselves.
+	const batchSize = 64
+
+	// According to the SPMC memory management invariants, we can only free
+	// spanSPMCs outside of the mark phase. We ensure we do this in two ways.
+	//
+	// 1. We take the work.spanSPMCs lock, which we need anyway. This ensures
+	//    that we are non-preemptible. If this path becomes lock-free, we will
+	//    need to become non-preemptible in some other way.
+	// 2. Once we are non-preemptible, we check the gcphase, and back out if
+	//    it's not safe.
+	//
+	// This way, we ensure that we don't start freeing if we're in the wrong
+	// phase, and the phase can't change on us while we're freeing.
+	lock(&work.spanSPMCs.lock)
+	if gcphase != _GCoff || work.spanSPMCs.all == nil {
+		unlock(&work.spanSPMCs.lock)
+		return false
+	}
+	rp := &work.spanSPMCs.all
+	gp := getg()
+	more := true
+	for i := 0; i < batchSize && !(preemptible && gp.preempt); i++ {
+		r := *rp
+		if r == nil {
+			more = false
+			break
+		}
+		if r.dead.Load() {
+			// It's dead. Deinitialize and free it.
+			*rp = r.allnext
+			r.deinit()
+			mheap_.spanSPMCAlloc.free(unsafe.Pointer(r))
+		} else {
+			// Still alive, likely in some P's chain.
+			// Skip it.
+			rp = &r.allnext
+		}
+	}
+	unlock(&work.spanSPMCs.lock)
+	return more
+}
+
+// tryStealSpan attempts to steal a span from another P's local queue.
 //
 // Returns a non-zero objptr on success.
-func spanQueueSteal(gcw *gcWork) objptr {
+func (w *gcWork) tryStealSpan() objptr {
 	pp := getg().m.p.ptr()

 	for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() {
+		if !work.spanqMask.read(enum.position()) {
+			continue
+		}
 		p2 := allp[enum.position()]
 		if pp == p2 {
 			continue
 		}
-		if s := gcw.spanq.stealFrom(&p2.gcw.spanq); s != 0 {
+		if s := w.spanq.steal(&p2.gcw.spanq); s != 0 {
 			return s
 		}
+		// N.B. This is intentionally racy. We may stomp on a mask set by
+		// a P that just put a bunch of work into its local queue.
+		//
+		// This is OK because the ragged barrier in gcMarkDone will set
+		// the bit on each P if there's local work we missed. This race
+		// should generally be rare, since the window between noticing
+		// an empty local queue and this bit being set is quite small.
+		work.spanqMask.clear(int32(enum.position()))
 	}
 	return 0
 }
@ -608,8 +841,7 @@ func scanSpan(p objptr, gcw *gcWork) {
 		atomic.Or8(bytep, mask)
 		gcw.bytesMarked += uint64(elemsize)
 		if debug.gctrace > 1 {
-			gcw.stats[spanclass.sizeclass()].spansSparseScanned++
-			gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned++
+			gcw.stats[spanclass.sizeclass()].sparseObjsScanned++
 		}
 		b := spanBase + uintptr(objIndex)*elemsize
 		scanObjectSmall(spanBase, b, elemsize, gcw)
@ -631,11 +863,47 @@ func scanSpan(p objptr, gcw *gcWork) {
 		return
 	}
 	gcw.bytesMarked += uint64(objsMarked) * uint64(elemsize)
+
+	// Check if we have enough density to make a dartboard scan
+	// worthwhile. If not, just do what scanobject does, but
+	// localized to the span, using the dartboard.
+	if !scan.HasFastScanSpanPacked() || objsMarked < int(nelems/8) {
+		if debug.gctrace > 1 {
+			gcw.stats[spanclass.sizeclass()].spansSparseScanned++
+			gcw.stats[spanclass.sizeclass()].spanObjsSparseScanned += uint64(objsMarked)
+		}
+		scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
+		return
+	}
+
+	// Scan the span.
+	//
+	// N.B. Use gcw.ptrBuf as the output buffer. This is a bit different
+	// from scanObjectsSmall, which puts addresses to dereference. ScanSpanPacked
+	// on the other hand, fills gcw.ptrBuf with already dereferenced pointers.
+	nptrs := scan.ScanSpanPacked(
+		unsafe.Pointer(spanBase),
+		&gcw.ptrBuf[0],
+		&toScan,
+		uintptr(spanclass.sizeclass()),
+		spanPtrMaskUnsafe(spanBase),
+	)
+	gcw.heapScanWork += int64(objsMarked) * int64(elemsize)
+
 	if debug.gctrace > 1 {
+		// Write down some statistics.
 		gcw.stats[spanclass.sizeclass()].spansDenseScanned++
 		gcw.stats[spanclass.sizeclass()].spanObjsDenseScanned += uint64(objsMarked)
 	}
-	scanObjectsSmall(spanBase, elemsize, nelems, gcw, &toScan)
+
+	// Process all the pointers we just got.
+	for _, p := range gcw.ptrBuf[:nptrs] {
+		if !tryDeferToSpanScan(p, gcw) {
+			if obj, span, objIndex := findObject(p, 0, 0); obj != 0 {
+				greyobject(obj, 0, 0, span, gcw, objIndex)
+			}
+		}
+	}
 }

 // spanSetScans sets any unset mark bits that have their mark bits set in the inline mark bits.
@ -798,12 +1066,27 @@ func heapBitsSmallForAddrInline(spanBase, addr, elemsize uintptr) uintptr {
 	return read
 }

+// spanPtrMaskUnsafe returns the pointer mask for a span with inline mark bits.
+//
+// The caller must ensure spanBase is the base of a span that:
+// - 1 page in size,
+// - Uses inline mark bits,
+// - Contains pointers.
+func spanPtrMaskUnsafe(spanBase uintptr) *gc.PtrMask {
+	base := spanBase + gc.PageSize - unsafe.Sizeof(gc.PtrMask{}) - unsafe.Sizeof(spanInlineMarkBits{})
+	return (*gc.PtrMask)(unsafe.Pointer(base))
+}
+
 type sizeClassScanStats struct {
-	spansDenseScanned     uint64
-	spanObjsDenseScanned  uint64
-	spansSparseScanned    uint64
-	spanObjsSparseScanned uint64
-	sparseObjsScanned     uint64
+	spansDenseScanned     uint64 // Spans scanned with ScanSpanPacked.
+	spanObjsDenseScanned  uint64 // Objects scanned with ScanSpanPacked.
+	spansSparseScanned    uint64 // Spans scanned with scanObjectsSmall.
+	spanObjsSparseScanned uint64 // Objects scanned with scanObjectsSmall.
+	sparseObjsScanned     uint64 // Objects scanned with scanobject or scanObjectSmall.
+	// Note: sparseObjsScanned is sufficient for both cases because
+	// a particular size class either uses scanobject or scanObjectSmall,
+	// not both. In the latter case, we also know that there was one
+	// object scanned per span, so no need for a span counter.
 }

 func dumpScanStats() {
@ -852,6 +1135,23 @@ func (w *gcWork) flushScanStats(dst *[gc.NumSizeClasses]sizeClassScanStats) {
 	clear(w.stats[:])
 }

+// gcMarkWorkAvailable reports whether there's any non-local work available to do.
+//
+// This is a heavyweight check and must only be used for correctness, not
+// as a hint.
+func gcMarkWorkAvailable() bool {
+	if !work.full.empty() {
+		return true // global work available
+	}
+	if work.markrootNext < work.markrootJobs {
+		return true // root scan work available
+	}
+	if work.spanqMask.any() {
+		return true // stealable local work available
+	}
+	return false
+}
+
 // scanObject scans the object starting at b, adding pointers to gcw.
 // b must point to the beginning of a heap object or an oblet.
 // scanObject consults the GC bitmap for the pointer mask and the
--- a/Show more
+++ b/Show more