[dev.simd] all: merge master (924fe98) into dev.simd

Conflicts: - src/cmd/compile/internal/amd64/ssa.go - src/cmd/compile/internal/ssa/expand_calls.go - src/cmd/compile/internal/ssagen/ssa.go - src/internal/buildcfg/exp.go - src/internal/cpu/cpu.go - src/internal/cpu/cpu_x86.go - src/runtime/mkpreempt.go - src/runtime/preempt_amd64.go - src/runtime/preempt_amd64.s Merge List: + 2025-08-14 924fe98902 cmd/internal/obj/riscv: add encoding for compressed riscv64 instructions + 2025-08-13 320df537cc cmd/compile: emit classify instructions for infinity tests on riscv64 + 2025-08-13 ca66f907dd cmd/compile: use generated loops instead of DUFFCOPY on amd64 + 2025-08-13 4b1800e476 encoding/json/v2: cleanup error constructors + 2025-08-13 af8870708b encoding/json/v2: fix incorrect marshaling of NaN in float64 any + 2025-08-13 0a75e5a07b encoding/json/v2: fix wrong type with cyclic marshal error in map[string]any + 2025-08-13 de9b6f9875 cmd/pprof: update vendored github.com/google/pprof + 2025-08-13 674c5f0edd os/exec: fix incorrect expansion of ".." in LookPath on plan9 + 2025-08-13 9bbea0f21a cmd/compile: during regalloc, fixedreg values are always available + 2025-08-13 08eef97500 runtime/trace: fix documentation typo + 2025-08-13 2fe5d51d04 internal/trace: fix wrong scope for Event.Range or EvGCSweepActive + 2025-08-13 9fcb87c352 cmd/compile: teach prove about len's & cap's max based on the element size + 2025-08-13 9763ece873 cmd/compile: absorb NEGV into branch on loong64 + 2025-08-13 f10a82b76f all: update vendored dependencies [generated] + 2025-08-13 3bea95b277 cmd/link/internal/ld: remove OpenBSD buildid workaround + 2025-08-12 90b7d7aaa2 cmd/compile/internal: optimize multiplication use new operation 'ADDshiftLLV' on loong64 + 2025-08-12 1b263fc604 runtime/race: restore previous version of LLVM TSAN on macOS + 2025-08-12 b266318cf7 cmd/compile/internal/ssa: use BEQ/BNE to optimize the combination of XOR and EQ/NE on loong64 + 2025-08-12 adbf59525c internal/runtime/gc/scan: avoid -1 index when cache sizes unavailable + 2025-08-12 4e182db5fc Revert "cmd/compile: use generated loops instead of DUFFCOPY on amd64" + 2025-08-12 d2b3c1a504 internal/trace: clarify which StateTransition events have stacks + 2025-08-12 f63e12d0e0 internal/trace: fix Sync.ClockSnapshot comment + 2025-08-12 8e317da77d internal/trace: remove unused StateTransition.id field + 2025-08-12 f67d8ff34a internal/trace/tracev2: adjust comment for consistency + 2025-08-12 fe4d445c36 internal/trace/tracev2: fix EvSTWBegin comment to include stack ID + 2025-08-12 750789fab7 internal/trace/internal/testgen: fix missing stacks nframes arg + 2025-08-12 889ab74169 internal/runtime/gc/scan: import scan kernel from gclab [green tea] + 2025-08-12 182336bf05 net/http: fix data race in client + 2025-08-12 f04421ea9a cmd/compile: soften test for 74788 + 2025-08-12 28aa529c99 cmd/compile: use generated loops instead of DUFFZERO on arm64 + 2025-08-12 ec9e1176c3 cmd/compile: use generated loops instead of DUFFCOPY on amd64 + 2025-08-12 d0a64f7969 Revert "cmd/compile/internal/ssa: Use transitive properties for len/cap" + 2025-08-12 00a7bdcb55 all: delete aliastypeparams GOEXPERIMENT + 2025-08-11 74421a305b Revert "cmd/compile: allow multi-field structs to be stored directly in interfaces" + 2025-08-11 c31359138c Revert "cmd/compile: allow StructSelect [x] of interface data fields for x>0" + 2025-08-11 7248995b60 Revert "cmd/compile: allow more args in StructMake folding rule" + 2025-08-11 caf9fc3ccd Revert "reflect: handle zero-sized fields of directly-stored structures correctly" + 2025-08-11 ce3f3e2ae7 cmd/link/internal/ld, internal/syscall/unix: use posix_fallocate on netbsd + 2025-08-11 3dbef65bf3 database/sql: allow drivers to override Scan behavior + 2025-08-11 2b804abf07 net: context aware Dialer.Dial functions + 2025-08-11 6abfe7b0de cmd/dist: require Go 1.24.6 as minimum bootstrap toolchain + 2025-08-11 691af6ca28 encoding/json: fix Indent trailing whitespace regression in goexperiment.jsonv2 + 2025-08-11 925149da20 net/http: add example for CrossOriginProtection + 2025-08-11 cf4af0b2f3 encoding/json/v2: fix UnmarshalDecode regression with EOF + 2025-08-11 b096ddb9ea internal/runtime/maps: loop invariant code motion with h2(hash) by hand + 2025-08-11 a2431776eb net, os, file/filepath, syscall: use slices.Equal in tests + 2025-08-11 a7f05b38f7 cmd/compile: convert branch with zero to more optimal branch zero on loong64 + 2025-08-11 1718828c81 internal/sync: warn about incorrect unsafe usage in HashTrieMap + 2025-08-11 084c0f8494 cmd/compile: allow InlMark operations to be speculatively executed + 2025-08-10 a62f72f7a7 cmd/compile/internal/ssa: optimise more branches with SGTconst/SGTUconst on loong64 + 2025-08-08 fbac94a799 internal/sync: rename Store parameter from old to new + 2025-08-08 317be4cfeb cmd/compile/internal/staticinit: remove deadcode + 2025-08-08 bce5601cbb cmd/go: fix fips doc link + 2025-08-08 777d76c4f2 text/template: use sync.OnceValue for builtinFuncs + 2025-08-08 0201524c52 math: remove redundant infinity tests + 2025-08-08 dcc77f9e3c cmd/go: fix get -tool when multiple packages are provided + 2025-08-08 c7b85e9ddc all: update blog link + 2025-08-08 a8dd771e13 crypto/tls: check if quic conn can send session ticket + 2025-08-08 bdb2d50fdf net: fix WriteMsgUDPAddrPort addr handling on IPv4 sockets + 2025-08-08 768c51e368 internal/runtime/maps: remove unused var bitsetDeleted + 2025-08-08 b3388569a1 reflect: handle zero-sized fields of directly-stored structures correctly + 2025-08-08 d83b16fcb8 internal/bytealg: vector implementation of compare for riscv64 + 2025-08-07 dd3abf6bc5 internal/bytealg: optimize Index/IndexString on loong64 + 2025-08-07 73ff6d1480 cmd/internal/obj/loong64: change the immediate range of ALSL{W/WU/V} + 2025-08-07 f3606b0825 cmd/compile/internal/ssa: fix typo in LOONG64Ops.go comment + 2025-08-07 ee7bb8969a cmd/internal/obj/loong64: add support for FSEL instruction + 2025-08-07 1f7ffca171 time: skip TestLongAdjustTimers on plan9 (too slow) + 2025-08-06 8282b72d62 runtime/race: update darwin race syso + 2025-08-06 dc54d7b607 all: remove support for windows/arm + 2025-08-06 e0a1ea431c cmd/compile: make panicBounds stack frame smaller on ppc64 + 2025-08-06 2747f925dd debug/macho: support reading imported symbols without LC_DYSYMTAB + 2025-08-06 025d36917c cmd/internal/testdir: pass -buildid to link command + 2025-08-06 f53dcb6280 cmd/internal/testdir: unify link command + 2025-08-06 a3895fe9f1 database/sql: avoid closing Rows while scan is in progress + 2025-08-06 608e9fac90 go/types, types2: flip on position tracing + 2025-08-06 72e8237cc1 cmd/compile: allow more args in StructMake folding rule + 2025-08-06 3406a617d9 internal/bytealg: vector implementation of indexbyte for riscv64 + 2025-08-06 75ea2d05c0 internal/bytealg: vector implementation of equal for riscv64 + 2025-08-05 17a8be7117 crypto/sha512: use const table for key loading on loong64 + 2025-08-05 dda9d780e2 crypto/sha256: use const table for key loading on loong64 + 2025-08-05 5defe8ebb3 internal/chacha8rand: replace WORD with instruction VMOVQ + 2025-08-05 4c7362e41c cmd/internal/obj/loong64: add new instructions ALSL{W/WU/V} for loong64 + 2025-08-05 a552737418 cmd/compile: fold negation into multiplication on loong64 + 2025-08-05 e1fd4faf91 runtime: fix godoc comment for inVDSOPage + 2025-08-05 bcd25c79aa cmd/compile: allow StructSelect [x] of interface data fields for x>0 + 2025-08-05 b0945a54b5 cmd/dist, internal/platform: mark freebsd/riscv64 broken + 2025-08-05 55d961b202 runtime: save AVX2 and AVX-512 state on asynchronous preemption + 2025-08-05 af0c4fe2ca runtime: save scalar registers off stack in amd64 async preemption + 2025-08-05 e73afaae69 internal/cpu: add AVX-512-CD and DQ, and derived "basic AVX-512" + 2025-08-05 cef381ba60 runtime: eliminate global state in mkpreempt.go + 2025-08-05 c0025d5e0b go/parser: correct comment in expectedErrors + 2025-08-05 4ee0df8c46 cmd: remove dead code + 2025-08-05 a2c45f0eb1 runtime: test VDSO symbol hash values + 2025-08-05 cd55f86b8d cmd/compile: allow multi-field structs to be stored directly in interfaces + 2025-08-05 21ab0128b6 cmd/compile: remove support for old-style bounds check calls + 2025-08-05 802d056c78 cmd/compile: move ppc64 over to new bounds check strategy + 2025-08-05 a3295df873 cmd/compile/internal/ssa: Use transitive properties for len/cap + 2025-08-05 bd082857a5 doc: fix typo in go memory model doc + 2025-08-05 2b622b05a9 cmd/compile: remove isUintXPowerOfTwo functions + 2025-08-05 72147ffa75 cmd/compile: simplify isUintXPowerOfTwo implementation + 2025-08-05 26da1199eb cmd/compile: make isUint{32,64}PowerOfTwo implementations clearer + 2025-08-05 5ab9f23977 cmd/compile, runtime: add checkptr instrumentation for unsafe.Add + 2025-08-05 fcc036f03b cmd/compile: optimise float <-> int register moves on riscv64 Change-Id: Ie94f29d9b0cc14a52a536866f5abaef27b5c52d7
2026-02-06 18:00:01 +00:00 · 2025-08-14 11:43:15 -04:00 · 2025-08-14 11:43:15 -04:00 · a4ad41708d
commit a4ad41708d
parent 8b90d48d8c 924fe98902
360 changed files with 10289 additions and 7265 deletions
--- a/api/next/49097.txt
+++ b/api/next/49097.txt
@ -0,0 +1,4 @@
+pkg net, method (*Dialer) DialIP(context.Context, string, netip.Addr, netip.Addr) (*IPConn, error) #49097
+pkg net, method (*Dialer) DialTCP(context.Context, string, netip.AddrPort, netip.AddrPort) (*TCPConn, error) #49097
+pkg net, method (*Dialer) DialUDP(context.Context, string, netip.AddrPort, netip.AddrPort) (*UDPConn, error) #49097
+pkg net, method (*Dialer) DialUnix(context.Context, string, *UnixAddr, *UnixAddr) (*UnixConn, error) #49097
--- a/api/next/67546.txt
+++ b/api/next/67546.txt
@ -0,0 +1,5 @@
+pkg database/sql/driver, type RowsColumnScanner interface { Close, Columns, Next, ScanColumn } #67546
+pkg database/sql/driver, type RowsColumnScanner interface, Close() error #67546
+pkg database/sql/driver, type RowsColumnScanner interface, Columns() []string #67546
+pkg database/sql/driver, type RowsColumnScanner interface, Next([]Value) error #67546
+pkg database/sql/driver, type RowsColumnScanner interface, ScanColumn(interface{}, int) error #67546
--- a/doc/go_mem.html
+++ b/doc/go_mem.html
@ -231,7 +231,7 @@ do exactly this.

 <p>
 A read of an array, struct, or complex number
-may by implemented as a read of each individual sub-value
+may be implemented as a read of each individual sub-value
 (array element, struct field, or real/imaginary component),
 in any order.
 Similarly, a write of an array, struct, or complex number
--- a/doc/next/5-toolchain.md
+++ b/doc/next/5-toolchain.md
@ -4,4 +4,9 @@

 ## Linker {#linker}

+## Bootstrap {#bootstrap}

+<!-- go.dev/issue/69315 -->
+As mentioned in the [Go 1.24 release notes](/doc/go1.24#bootstrap), Go 1.26 now requires
+Go 1.24.6 or later for bootstrap.
+We expect that Go 1.28 will require a minor release of Go 1.26 or later for bootstrap.
--- a/doc/next/6-stdlib/99-minor/database/sql/driver/67546.md
+++ b/doc/next/6-stdlib/99-minor/database/sql/driver/67546.md
@ -0,0 +1 @@
+A database driver may implement [RowsColumnScanner] to entirely override `Scan` behavior.
--- a/doc/next/6-stdlib/99-minor/net/49097.md
+++ b/doc/next/6-stdlib/99-minor/net/49097.md
@ -0,0 +1 @@
+Added context aware dial functions for TCP, UDP, IP and Unix networks.
--- a/doc/next/7-ports.md
+++ b/doc/next/7-ports.md
@ -1,2 +1,6 @@
 ## Ports {#ports}

+### Windows
+
+<!-- go.dev/issue/71671 -->
+As [announced](/doc/go1.25#windows) in the Go 1.25 release notes, the [broken](/doc/go1.24#windows) 32-bit windows/arm port (`GOOS=windows` `GOARCH=arm`) is removed.
--- a/src/cmd/asm/internal/arch/loong64.go
+++ b/src/cmd/asm/internal/arch/loong64.go
@ -23,18 +23,6 @@ func jumpLoong64(word string) bool {
 	return false
 }

-// IsLoong64MUL reports whether the op (as defined by an loong64.A* constant) is
-// one of the MUL/DIV/REM instructions that require special handling.
-func IsLoong64MUL(op obj.As) bool {
-	switch op {
-	case loong64.AMUL, loong64.AMULU, loong64.AMULV, loong64.AMULVU,
-		loong64.ADIV, loong64.ADIVU, loong64.ADIVV, loong64.ADIVVU,
-		loong64.AREM, loong64.AREMU, loong64.AREMV, loong64.AREMVU:
-		return true
-	}
-	return false
-}
-
 // IsLoong64RDTIME reports whether the op (as defined by an loong64.A*
 // constant) is one of the RDTIMELW/RDTIMEHW/RDTIMED instructions that
 // require special handling.
--- a/src/cmd/asm/internal/asm/asm.go
+++ b/src/cmd/asm/internal/asm/asm.go
@ -974,14 +974,6 @@ func (p *Parser) getConstant(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
 	return addr.Offset
 }

-// getImmediate checks that addr represents an immediate constant and returns its value.
-func (p *Parser) getImmediate(prog *obj.Prog, op obj.As, addr *obj.Addr) int64 {
-	if addr.Type != obj.TYPE_CONST || addr.Name != 0 || addr.Reg != 0 || addr.Index != 0 {
-		p.errorf("%s: expected immediate constant; found %s", op, obj.Dconv(prog, addr))
-	}
-	return addr.Offset
-}
-
 // getRegister checks that addr represents a register and returns its value.
 func (p *Parser) getRegister(prog *obj.Prog, op obj.As, addr *obj.Addr) int16 {
 	if addr.Type != obj.TYPE_REG || addr.Offset != 0 || addr.Name != 0 || addr.Index != 0 {
--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s
+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s
@ -376,6 +376,10 @@ lable2:
 	FTINTRNEVF	F0, F2		// 02e41a01
 	FTINTRNEVD	F0, F2		// 02e81a01

+	// FSEL instruction
+	FSEL	FCC0, F1, F2, F3	// 4304000d
+	FSEL	FCC1, F1, F2		// 4284000d
+
 	// LDX.{B,BU,H,HU,W,WU,D} instructions
 	MOVB		(R14)(R13), R12	// cc350038
 	MOVBU		(R14)(R13), R12	// cc352038
@ -1095,3 +1099,8 @@ lable2:
 	XVBITREVH	$15, X2, X1	// 417c1877
 	XVBITREVW	$31, X2, X1	// 41fc1877
 	XVBITREVV	$63, X2, X1	// 41fc1977
+
+	// ALSL{W/WU/D}
+	ALSLW		$4, R4, R5, R6	// 86940500
+	ALSLWU		$4, R4, R5, R6	// 86940700
+	ALSLV		$4, R4, R5, R6	// 86942d00
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@ -182,45 +182,6 @@ func memIdx(a *obj.Addr, v *ssa.Value) {
 	a.Index = i
 }

-// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
-// See runtime/mkduff.go.
-const (
-	dzBlocks    = 16 // number of MOV/ADD blocks
-	dzBlockLen  = 4  // number of clears per block
-	dzBlockSize = 23 // size of instructions in a single block
-	dzMovSize   = 5  // size of single MOV instruction w/ offset
-	dzLeaqSize  = 4  // size of single LEAQ instruction
-	dzClearStep = 16 // number of bytes cleared by each MOV instruction
-)
-
-func duffStart(size int64) int64 {
-	x, _ := duff(size)
-	return x
-}
-func duffAdj(size int64) int64 {
-	_, x := duff(size)
-	return x
-}
-
-// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
-// required to use the duffzero mechanism for a block of the given size.
-func duff(size int64) (int64, int64) {
-	if size < 32 || size > 1024 || size%dzClearStep != 0 {
-		panic("bad duffzero size")
-	}
-	steps := size / dzClearStep
-	blocks := steps / dzBlockLen
-	steps %= dzBlockLen
-	off := dzBlockSize * (dzBlocks - blocks)
-	var adj int64
-	if steps != 0 {
-		off -= dzLeaqSize
-		off -= dzMovSize * steps
-		adj -= dzClearStep * (dzBlockLen - steps)
-	}
-	return off, adj
-}
-
 func getgFromTLS(s *ssagen.State, r int16) {
 	// See the comments in cmd/internal/obj/x86/obj6.go
 	// near CanUse1InsnTLS for a detailed explanation of these instructions.
@ -1168,20 +1129,110 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 			zero16(off + n - 16)
 		}

-	case ssa.OpAMD64DUFFCOPY:
-		p := s.Prog(obj.ADUFFCOPY)
-		p.To.Type = obj.TYPE_ADDR
-		p.To.Sym = ir.Syms.Duffcopy
-		if v.AuxInt%16 != 0 {
-			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
+	case ssa.OpAMD64LoweredMove:
+		dstReg := v.Args[0].Reg()
+		srcReg := v.Args[1].Reg()
+		if dstReg == srcReg {
+			break
+		}
+		tmpReg := int16(x86.REG_X14)
+		n := v.AuxInt
+		if n < 16 {
+			v.Fatalf("Move too small %d", n)
+		}
+		// move 16 bytes from srcReg+off to dstReg+off.
+		move16 := func(off int64) {
+			move16(s, srcReg, dstReg, tmpReg, off)
+		}
+
+		// Generate copying instructions.
+		var off int64
+		for n >= 16 {
+			move16(off)
+			off += 16
+			n -= 16
+		}
+		if n != 0 {
+			// use partially overlapped read/write.
+			// TODO: use smaller operations when we can?
+			move16(off + n - 16)
+		}
+
+	case ssa.OpAMD64LoweredMoveLoop:
+		dstReg := v.Args[0].Reg()
+		srcReg := v.Args[1].Reg()
+		if dstReg == srcReg {
+			break
+		}
+		countReg := v.RegTmp()
+		tmpReg := int16(x86.REG_X14)
+		n := v.AuxInt
+		loopSize := int64(64)
+		if n < 3*loopSize {
+			// - a loop count of 0 won't work.
+			// - a loop count of 1 is useless.
+			// - a loop count of 2 is a code size ~tie
+			//     4 instructions to implement the loop
+			//     4 instructions in the loop body
+			//   vs
+			//     8 instructions in the straightline code
+			//   Might as well use straightline code.
+			v.Fatalf("ZeroLoop size too small %d", n)
+		}
+		// move 16 bytes from srcReg+off to dstReg+off.
+		move16 := func(off int64) {
+			move16(s, srcReg, dstReg, tmpReg, off)
+		}
+
+		// Put iteration count in a register.
+		//   MOVL    $n, countReg
+		p := s.Prog(x86.AMOVL)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = n / loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		cntInit := p
+
+		// Copy loopSize bytes starting at srcReg to dstReg.
+		for i := range loopSize / 16 {
+			move16(i * 16)
+		}
+		//   ADDQ    $loopSize, srcReg
+		p = s.Prog(x86.AADDQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = srcReg
+		//   ADDQ    $loopSize, dstReg
+		p = s.Prog(x86.AADDQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = dstReg
+		//   DECL    countReg
+		p = s.Prog(x86.ADECL)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		// Jump to loop header if we're not done yet.
+		//   JNE     head
+		p = s.Prog(x86.AJNE)
+		p.To.Type = obj.TYPE_BRANCH
+		p.To.SetTarget(cntInit.Link)
+
+		// Multiples of the loop size are now done.
+		n %= loopSize
+
+		// Copy any fractional portion.
+		var off int64
+		for n >= 16 {
+			move16(off)
+			off += 16
+			n -= 16
+		}
+		if n != 0 {
+			// Use partially-overlapping copy.
+			move16(off + n - 16)
 		}
-		p.To.Offset = 14 * (64 - v.AuxInt/16)
-		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
-		//	MOVUPS	(SI), X0
-		//	ADDQ	$16, SI
-		//	MOVUPS	X0, (DI)
-		//	ADDQ	$16, DI
-		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.

 	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
 		if v.Type.IsMemory() {
@ -2149,6 +2200,24 @@ func zero16(s *ssagen.State, reg int16, off int64) {
 	p.To.Offset = off
 }

+// move 16 bytes from src+off to dst+off using temporary register tmp.
+func move16(s *ssagen.State, src, dst, tmp int16, off int64) {
+	//   MOVUPS  off(srcReg), tmpReg
+	//   MOVUPS  tmpReg, off(dstReg)
+	p := s.Prog(x86.AMOVUPS)
+	p.From.Type = obj.TYPE_MEM
+	p.From.Reg = src
+	p.From.Offset = off
+	p.To.Type = obj.TYPE_REG
+	p.To.Reg = tmp
+	p = s.Prog(x86.AMOVUPS)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = tmp
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = dst
+	p.To.Offset = off
+}
+
 // XXX maybe make this part of v.Reg?
 // On the other hand, it is architecture-specific.
 func simdReg(v *ssa.Value) int16 {
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@ -1050,33 +1050,118 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.From.Offset = int64(condCode)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
-	case ssa.OpARM64DUFFZERO:
-		// runtime.duffzero expects start address in R20
-		p := s.Prog(obj.ADUFFZERO)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ir.Syms.Duffzero
-		p.To.Offset = v.AuxInt
 	case ssa.OpARM64LoweredZero:
-		// STP.P	(ZR,ZR), 16(R16)
-		// CMP	Rarg1, R16
-		// BLE	-2(PC)
-		// arg1 is the address of the last 16-byte unit to zero
-		p := s.Prog(arm64.ASTP)
-		p.Scond = arm64.C_XPOST
-		p.From.Type = obj.TYPE_REGREG
-		p.From.Reg = arm64.REGZERO
-		p.From.Offset = int64(arm64.REGZERO)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Reg = arm64.REG_R16
-		p.To.Offset = 16
-		p2 := s.Prog(arm64.ACMP)
-		p2.From.Type = obj.TYPE_REG
-		p2.From.Reg = v.Args[1].Reg()
-		p2.Reg = arm64.REG_R16
-		p3 := s.Prog(arm64.ABLE)
-		p3.To.Type = obj.TYPE_BRANCH
-		p3.To.SetTarget(p)
+		ptrReg := v.Args[0].Reg()
+		n := v.AuxInt
+		if n < 16 {
+			v.Fatalf("Zero too small %d", n)
+		}
+
+		// Generate zeroing instructions.
+		var off int64
+		for n >= 16 {
+			//  STP     (ZR, ZR), off(ptrReg)
+			zero16(s, ptrReg, off, false)
+			off += 16
+			n -= 16
+		}
+		// Write any fractional portion.
+		// An overlapping 16-byte write can't be used here
+		// because STP's offsets must be a multiple of 8.
+		if n > 8 {
+			//  MOVD    ZR, off(ptrReg)
+			zero8(s, ptrReg, off)
+			off += 8
+			n -= 8
+		}
+		if n != 0 {
+			//  MOVD    ZR, off+n-8(ptrReg)
+			// TODO: for n<=4 we could use a smaller write.
+			zero8(s, ptrReg, off+n-8)
+		}
+	case ssa.OpARM64LoweredZeroLoop:
+		ptrReg := v.Args[0].Reg()
+		countReg := v.RegTmp()
+		n := v.AuxInt
+		loopSize := int64(64)
+		if n < 3*loopSize {
+			// - a loop count of 0 won't work.
+			// - a loop count of 1 is useless.
+			// - a loop count of 2 is a code size ~tie
+			//     3 instructions to implement the loop
+			//     4 instructions in the loop body
+			//   vs
+			//     8 instructions in the straightline code
+			//   Might as well use straightline code.
+			v.Fatalf("ZeroLoop size too small %d", n)
+		}
+
+		// Put iteration count in a register.
+		//   MOVD    $n, countReg
+		p := s.Prog(arm64.AMOVD)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = n / loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		cntInit := p
+
+		// Zero loopSize bytes starting at ptrReg.
+		// Increment ptrReg by loopSize as a side effect.
+		for range loopSize / 16 {
+			//  STP.P   (ZR, ZR), 16(ptrReg)
+			zero16(s, ptrReg, 0, true)
+			// TODO: should we use the postincrement form,
+			// or use a separate += 64 instruction?
+			// postincrement saves an instruction, but maybe
+			// it requires more integer units to do the +=16s.
+		}
+		// Decrement loop count.
+		//   SUB     $1, countReg
+		p = s.Prog(arm64.ASUB)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = 1
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		// Jump to loop header if we're not done yet.
+		//   CBNZ    head
+		p = s.Prog(arm64.ACBNZ)
+		p.From.Type = obj.TYPE_REG
+		p.From.Reg = countReg
+		p.To.Type = obj.TYPE_BRANCH
+		p.To.SetTarget(cntInit.Link)
+
+		// Multiples of the loop size are now done.
+		n %= loopSize
+
+		// Write any fractional portion.
+		var off int64
+		for n >= 16 {
+			//  STP     (ZR, ZR), off(ptrReg)
+			zero16(s, ptrReg, off, false)
+			off += 16
+			n -= 16
+		}
+		if n > 8 {
+			// Note: an overlapping 16-byte write can't be used
+			// here because STP's offsets must be a multiple of 8.
+			//  MOVD    ZR, off(ptrReg)
+			zero8(s, ptrReg, off)
+			off += 8
+			n -= 8
+		}
+		if n != 0 {
+			//  MOVD    ZR, off+n-8(ptrReg)
+			// TODO: for n<=4 we could use a smaller write.
+			zero8(s, ptrReg, off+n-8)
+		}
+		// TODO: maybe we should use the count register to instead
+		// hold an end pointer and compare against that?
+		//   ADD $n, ptrReg, endReg
+		// then
+		//   CMP ptrReg, endReg
+		//   BNE loop
+		// There's a past-the-end pointer here, any problem with that?
+
 	case ssa.OpARM64DUFFCOPY:
 		p := s.Prog(obj.ADUFFCOPY)
 		p.To.Type = obj.TYPE_MEM
@ -1482,3 +1567,35 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
 	p.Pos = p.Pos.WithNotStmt()
 	return p
 }
+
+// zero16 zeroes 16 bytes at reg+off.
+// If postInc is true, increment reg by 16.
+func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
+	//   STP     (ZR, ZR), off(reg)
+	p := s.Prog(arm64.ASTP)
+	p.From.Type = obj.TYPE_REGREG
+	p.From.Reg = arm64.REGZERO
+	p.From.Offset = int64(arm64.REGZERO)
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = reg
+	p.To.Offset = off
+	if postInc {
+		if off != 0 {
+			panic("can't postinc with non-zero offset")
+		}
+		//   STP.P  (ZR, ZR), 16(reg)
+		p.Scond = arm64.C_XPOST
+		p.To.Offset = 16
+	}
+}
+
+// zero8 zeroes 8 bytes at reg+off.
+func zero8(s *ssagen.State, reg int16, off int64) {
+	//   MOVD     ZR, off(reg)
+	p := s.Prog(arm64.AMOVD)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = arm64.REGZERO
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = reg
+	p.To.Offset = off
+}
--- a/src/cmd/compile/internal/importer/support.go
+++ b/src/cmd/compile/internal/importer/support.go
@ -9,20 +9,14 @@ package importer
 import (
 	"cmd/compile/internal/base"
 	"cmd/compile/internal/types2"
-	"fmt"
 	"go/token"
 	"internal/pkgbits"
-	"sync"
 )

 func assert(p bool) {
 	base.Assert(p)
 }

-func errorf(format string, args ...interface{}) {
-	panic(fmt.Sprintf(format, args...))
-}
-
 const deltaNewFile = -64 // see cmd/compile/internal/gc/bexport.go

 // Synthesize a token.Pos
@ -31,108 +25,6 @@ type fakeFileSet struct {
 	files map[string]*token.File
 }

-func (s *fakeFileSet) pos(file string, line, column int) token.Pos {
-	// TODO(mdempsky): Make use of column.
-
-	// Since we don't know the set of needed file positions, we
-	// reserve maxlines positions per file.
-	const maxlines = 64 * 1024
-	f := s.files[file]
-	if f == nil {
-		f = s.fset.AddFile(file, -1, maxlines)
-		s.files[file] = f
-		// Allocate the fake linebreak indices on first use.
-		// TODO(adonovan): opt: save ~512KB using a more complex scheme?
-		fakeLinesOnce.Do(func() {
-			fakeLines = make([]int, maxlines)
-			for i := range fakeLines {
-				fakeLines[i] = i
-			}
-		})
-		f.SetLines(fakeLines)
-	}
-
-	if line > maxlines {
-		line = 1
-	}
-
-	// Treat the file as if it contained only newlines
-	// and column=1: use the line number as the offset.
-	return f.Pos(line - 1)
-}
-
-var (
-	fakeLines     []int
-	fakeLinesOnce sync.Once
-)
-
-func chanDir(d int) types2.ChanDir {
-	// tag values must match the constants in cmd/compile/internal/gc/go.go
-	switch d {
-	case 1 /* Crecv */ :
-		return types2.RecvOnly
-	case 2 /* Csend */ :
-		return types2.SendOnly
-	case 3 /* Cboth */ :
-		return types2.SendRecv
-	default:
-		errorf("unexpected channel dir %d", d)
-		return 0
-	}
-}
-
-var predeclared = []types2.Type{
-	// basic types
-	types2.Typ[types2.Bool],
-	types2.Typ[types2.Int],
-	types2.Typ[types2.Int8],
-	types2.Typ[types2.Int16],
-	types2.Typ[types2.Int32],
-	types2.Typ[types2.Int64],
-	types2.Typ[types2.Uint],
-	types2.Typ[types2.Uint8],
-	types2.Typ[types2.Uint16],
-	types2.Typ[types2.Uint32],
-	types2.Typ[types2.Uint64],
-	types2.Typ[types2.Uintptr],
-	types2.Typ[types2.Float32],
-	types2.Typ[types2.Float64],
-	types2.Typ[types2.Complex64],
-	types2.Typ[types2.Complex128],
-	types2.Typ[types2.String],
-
-	// basic type aliases
-	types2.Universe.Lookup("byte").Type(),
-	types2.Universe.Lookup("rune").Type(),
-
-	// error
-	types2.Universe.Lookup("error").Type(),
-
-	// untyped types
-	types2.Typ[types2.UntypedBool],
-	types2.Typ[types2.UntypedInt],
-	types2.Typ[types2.UntypedRune],
-	types2.Typ[types2.UntypedFloat],
-	types2.Typ[types2.UntypedComplex],
-	types2.Typ[types2.UntypedString],
-	types2.Typ[types2.UntypedNil],
-
-	// package unsafe
-	types2.Typ[types2.UnsafePointer],
-
-	// invalid type
-	types2.Typ[types2.Invalid], // only appears in packages with errors
-
-	// used internally by gc; never used by this package or in .a files
-	// not to be confused with the universe any
-	anyType{},
-
-	// comparable
-	types2.Universe.Lookup("comparable").Type(),
-
-	// "any" has special handling: see usage of predeclared.
-}
-
 type anyType struct{}

 func (t anyType) Underlying() types2.Type { return t }
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@ -1241,17 +1241,6 @@ func pruneUnusedAutos(ll []*ir.Name, vis *hairyVisitor) []*ir.Name {
 	return s
 }

-// numNonClosures returns the number of functions in list which are not closures.
-func numNonClosures(list []*ir.Func) int {
-	count := 0
-	for _, fn := range list {
-		if fn.OClosure == nil {
-			count++
-		}
-	}
-	return count
-}
-
 func doList(list []ir.Node, do func(ir.Node) bool) bool {
 	for _, x := range list {
 		if x != nil {
--- a/src/cmd/compile/internal/inline/inlheur/scoring.go
+++ b/src/cmd/compile/internal/inline/inlheur/scoring.go
@ -399,14 +399,6 @@ func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int {
 	return score
 }

-// LargestPositiveScoreAdjustment tries to estimate the largest possible
-// positive score adjustment that could be applied to a given callsite.
-// At the moment we don't have very many positive score adjustments, so
-// this is just hard-coded, not table-driven.
-func LargestPositiveScoreAdjustment(fn *ir.Func) int {
-	return adjValues[panicPathAdj] + adjValues[initFuncAdj]
-}
-
 // callSiteTab contains entries for each call in the function
 // currently being processed by InlineCalls; this variable will either
 // be set to 'cstabCache' below (for non-inlinable routines) or to the
--- a/src/cmd/compile/internal/ir/copy.go
+++ b/src/cmd/compile/internal/ir/copy.go
@ -32,12 +32,3 @@ func DeepCopy(pos src.XPos, n Node) Node {
 	}
 	return edit(n)
 }
-
-// DeepCopyList returns a list of deep copies (using DeepCopy) of the nodes in list.
-func DeepCopyList(pos src.XPos, list []Node) []Node {
-	var out []Node
-	for _, n := range list {
-		out = append(out, DeepCopy(pos, n))
-	}
-	return out
-}
--- a/src/cmd/compile/internal/ir/mini.go
+++ b/src/cmd/compile/internal/ir/mini.go
@ -34,15 +34,6 @@ type miniNode struct {
 	esc  uint16
 }

-// posOr returns pos if known, or else n.pos.
-// For use in DeepCopy.
-func (n *miniNode) posOr(pos src.XPos) src.XPos {
-	if pos.IsKnown() {
-		return pos
-	}
-	return n.pos
-}
-
 // op can be read, but not written.
 // An embedding implementation can provide a SetOp if desired.
 // (The panicking SetOp is with the other panics below.)
--- a/src/cmd/compile/internal/ir/visit.go
+++ b/src/cmd/compile/internal/ir/visit.go
@ -155,19 +155,6 @@ func Any(n Node, cond func(Node) bool) bool {
 	return do(n)
 }

-// AnyList calls Any(x, cond) for each node x in the list, in order.
-// If any call returns true, AnyList stops and returns true.
-// Otherwise, AnyList returns false after calling Any(x, cond)
-// for every x in the list.
-func AnyList(list Nodes, cond func(Node) bool) bool {
-	for _, x := range list {
-		if Any(x, cond) {
-			return true
-		}
-	}
-	return false
-}
-
 // EditChildren edits the child nodes of n, replacing each child x with edit(x).
 //
 // Note that EditChildren(n, edit) only calls edit(x) for n's immediate children.
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@ -1065,6 +1065,17 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 			{Type: obj.TYPE_CONST, Offset: int64((v.AuxInt >> 0) & 0x1f)},
 		})

+	case ssa.OpLOONG64ADDshiftLLV:
+		// ADDshiftLLV Rarg0, Rarg1, $shift
+		// ALSLV $shift, Rarg1, Rarg0, Rtmp
+		p := s.Prog(v.Op.Asm())
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = v.AuxInt
+		p.Reg = v.Args[1].Reg()
+		p.AddRestSourceReg(v.Args[0].Reg())
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = v.Reg()
+
 	case ssa.OpClobber, ssa.OpClobberReg:
 		// TODO: implement for clobberdead experiment. Nop is ok for now.
 	default:
@ -1075,8 +1086,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 var blockJump = map[ssa.BlockKind]struct {
 	asm, invasm obj.As
 }{
-	ssa.BlockLOONG64EQ:   {loong64.ABEQ, loong64.ABNE},
-	ssa.BlockLOONG64NE:   {loong64.ABNE, loong64.ABEQ},
+	ssa.BlockLOONG64EQZ:  {loong64.ABEQ, loong64.ABNE},
+	ssa.BlockLOONG64NEZ:  {loong64.ABNE, loong64.ABEQ},
 	ssa.BlockLOONG64LTZ:  {loong64.ABLTZ, loong64.ABGEZ},
 	ssa.BlockLOONG64GEZ:  {loong64.ABGEZ, loong64.ABLTZ},
 	ssa.BlockLOONG64LEZ:  {loong64.ABLEZ, loong64.ABGTZ},
@ -1102,7 +1113,7 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
 	case ssa.BlockExit, ssa.BlockRetJmp:
 	case ssa.BlockRet:
 		s.Prog(obj.ARET)
-	case ssa.BlockLOONG64EQ, ssa.BlockLOONG64NE,
+	case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
 		ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
 		ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
 		ssa.BlockLOONG64BEQ, ssa.BlockLOONG64BNE,
@ -1132,7 +1143,7 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = b.Controls[0].Reg()
 			p.Reg = b.Controls[1].Reg()
-		case ssa.BlockLOONG64EQ, ssa.BlockLOONG64NE,
+		case ssa.BlockLOONG64EQZ, ssa.BlockLOONG64NEZ,
 			ssa.BlockLOONG64LTZ, ssa.BlockLOONG64GEZ,
 			ssa.BlockLOONG64LEZ, ssa.BlockLOONG64GTZ,
 			ssa.BlockLOONG64FPT, ssa.BlockLOONG64FPF:
--- a/src/cmd/compile/internal/noder/posmap.go
+++ b/src/cmd/compile/internal/noder/posmap.go
@ -23,7 +23,6 @@ type poser interface{ Pos() syntax.Pos }
 type ender interface{ End() syntax.Pos }

 func (m *posMap) pos(p poser) src.XPos { return m.makeXPos(p.Pos()) }
-func (m *posMap) end(p ender) src.XPos { return m.makeXPos(p.End()) }

 func (m *posMap) makeXPos(pos syntax.Pos) src.XPos {
 	// Predeclared objects (e.g., the result parameter for error.Error)
--- a/src/cmd/compile/internal/noder/reader.go
+++ b/src/cmd/compile/internal/noder/reader.go
@ -3681,17 +3681,6 @@ func expandInline(fn *ir.Func, pri pkgReaderIndex) {
 	typecheck.Target.Funcs = typecheck.Target.Funcs[:topdcls]
 }

-// usedLocals returns a set of local variables that are used within body.
-func usedLocals(body []ir.Node) ir.NameSet {
-	var used ir.NameSet
-	ir.VisitList(body, func(n ir.Node) {
-		if n, ok := n.(*ir.Name); ok && n.Op() == ir.ONAME && n.Class == ir.PAUTO {
-			used.Add(n)
-		}
-	})
-	return used
-}
-
 // @@@ Method wrappers
 //
 // Here we handle constructing "method wrappers," alternative entry
--- a/src/cmd/compile/internal/noder/unified.go
+++ b/src/cmd/compile/internal/noder/unified.go
@ -7,7 +7,6 @@ package noder
 import (
 	"cmp"
 	"fmt"
-	"internal/buildcfg"
 	"internal/pkgbits"
 	"internal/types/errors"
 	"io"
@ -464,11 +463,8 @@ func readPackage(pr *pkgReader, importpkg *types.Pkg, localStub bool) {
 // writeUnifiedExport writes to `out` the finalized, self-contained
 // Unified IR export data file for the current compilation unit.
 func writeUnifiedExport(out io.Writer) {
-	// Use V2 as the encoded version aliastypeparams GOEXPERIMENT is enabled.
-	version := pkgbits.V1
-	if buildcfg.Experiment.AliasTypeParams {
-		version = pkgbits.V2
-	}
+	// Use V2 as the encoded version for aliastypeparams.
+	version := pkgbits.V2
 	l := linker{
 		pw: pkgbits.NewPkgEncoder(version, base.Debug.SyncFrames),

--- a/src/cmd/compile/internal/noder/writer.go
+++ b/src/cmd/compile/internal/noder/writer.go
@ -96,11 +96,8 @@ type pkgWriter struct {
 // newPkgWriter returns an initialized pkgWriter for the specified
 // package.
 func newPkgWriter(m posMap, pkg *types2.Package, info *types2.Info, otherInfo map[*syntax.FuncLit]bool) *pkgWriter {
-	// Use V2 as the encoded version aliastypeparams GOEXPERIMENT is enabled.
-	version := pkgbits.V1
-	if buildcfg.Experiment.AliasTypeParams {
-		version = pkgbits.V2
-	}
+	// Use V2 as the encoded version for aliastypeparams.
+	version := pkgbits.V2
 	return &pkgWriter{
 		PkgEncoder: pkgbits.NewPkgEncoder(version, base.Debug.SyncFrames),

@ -2413,11 +2410,6 @@ func (p posVar) String() string {
 	return p.pos.String() + ":" + p.var_.String()
 }

-func (w *writer) exprList(expr syntax.Expr) {
-	w.Sync(pkgbits.SyncExprList)
-	w.exprs(syntax.UnpackListExpr(expr))
-}
-
 func (w *writer) exprs(exprs []syntax.Expr) {
 	w.Sync(pkgbits.SyncExprs)
 	w.Len(len(exprs))
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@ -14,6 +14,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/ppc64"
+	"internal/abi"
 	"internal/buildcfg"
 	"math"
 	"strings"
@ -1913,12 +1914,90 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]

-	case ssa.OpPPC64LoweredPanicBoundsA, ssa.OpPPC64LoweredPanicBoundsB, ssa.OpPPC64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+	case ssa.OpPPC64LoweredPanicBoundsRR, ssa.OpPPC64LoweredPanicBoundsRC, ssa.OpPPC64LoweredPanicBoundsCR, ssa.OpPPC64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpPPC64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - ppc64.REG_R3)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - ppc64.REG_R3)
+		case ssa.OpPPC64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - ppc64.REG_R3)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(ppc64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = ppc64.REG_R3 + int16(yVal)
+			}
+		case ssa.OpPPC64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - ppc64.REG_R3)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(ppc64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = ppc64.REG_R3 + int16(xVal)
+			}
+		case ssa.OpPPC64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(ppc64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = ppc64.REG_R3 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(ppc64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = ppc64.REG_R3 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds

 	case ssa.OpPPC64LoweredNilCheck:
 		if buildcfg.GOOS == "aix" {
--- a/src/cmd/compile/internal/reflectdata/reflect.go
+++ b/src/cmd/compile/internal/reflectdata/reflect.go
@ -1468,10 +1468,3 @@ func MarkUsedIfaceMethod(n *ir.CallExpr) {
 		Add:  InterfaceMethodOffset(ityp, midx),
 	})
 }
-
-func deref(t *types.Type) *types.Type {
-	if t.IsPtr() {
-		return t.Elem()
-	}
-	return t
-}
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@ -417,9 +417,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = r
 	case ssa.OpRISCV64FSQRTS, ssa.OpRISCV64FNEGS, ssa.OpRISCV64FABSD, ssa.OpRISCV64FSQRTD, ssa.OpRISCV64FNEGD,
-		ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVDX,
+		ssa.OpRISCV64FMVSX, ssa.OpRISCV64FMVXS, ssa.OpRISCV64FMVDX, ssa.OpRISCV64FMVXD,
 		ssa.OpRISCV64FCVTSW, ssa.OpRISCV64FCVTSL, ssa.OpRISCV64FCVTWS, ssa.OpRISCV64FCVTLS,
 		ssa.OpRISCV64FCVTDW, ssa.OpRISCV64FCVTDL, ssa.OpRISCV64FCVTWD, ssa.OpRISCV64FCVTLD, ssa.OpRISCV64FCVTDS, ssa.OpRISCV64FCVTSD,
+		ssa.OpRISCV64FCLASSS, ssa.OpRISCV64FCLASSD,
 		ssa.OpRISCV64NOT, ssa.OpRISCV64NEG, ssa.OpRISCV64NEGW, ssa.OpRISCV64CLZ, ssa.OpRISCV64CLZW, ssa.OpRISCV64CTZ, ssa.OpRISCV64CTZW,
 		ssa.OpRISCV64REV8, ssa.OpRISCV64CPOP, ssa.OpRISCV64CPOPW:
 		p := s.Prog(v.Op.Asm())
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@ -264,24 +264,6 @@
 (Move [8] dst src mem) => (MOVQstore dst (MOVQload src mem) mem)
 (Move [16] dst src mem) => (MOVOstore dst (MOVOload src mem) mem)

-(Move [32] dst src mem) =>
-	(Move [16]
-		(OffPtr <dst.Type> dst [16])
-		(OffPtr <src.Type> src [16])
-		(Move [16] dst src mem))
-
-(Move [48] dst src mem) =>
-	(Move [32]
-		(OffPtr <dst.Type> dst [16])
-		(OffPtr <src.Type> src [16])
-		(Move [16] dst src mem))
-
-(Move [64] dst src mem) =>
-	(Move [32]
-		(OffPtr <dst.Type> dst [32])
-		(OffPtr <src.Type> src [32])
-		(Move [32] dst src mem))
-
 (Move [3] dst src mem) =>
 	(MOVBstore [2] dst (MOVBload [2] src mem)
 		(MOVWstore dst (MOVWload src mem) mem))
@ -310,28 +292,19 @@
 	(MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))

-// Adjust moves to be a multiple of 16 bytes.
-(Move [s] dst src mem)
-	&& s > 16 && s%16 != 0 && s%16 <= 8 =>
-	(Move [s-s%16]
-		(OffPtr <dst.Type> dst [s%16])
-		(OffPtr <src.Type> src [s%16])
-		(MOVQstore dst (MOVQload src mem) mem))
-(Move [s] dst src mem)
-	&& s > 16 && s%16 != 0 && s%16 > 8 =>
-	(Move [s-s%16]
-		(OffPtr <dst.Type> dst [s%16])
-		(OffPtr <src.Type> src [s%16])
-		(MOVOstore dst (MOVOload src mem) mem))
+// Copying up to 192 bytes uses straightline code.
+(Move [s] dst src mem) && s > 16 && s < 192 && logLargeCopy(v, s) => (LoweredMove [s] dst src mem)

-// Medium copying uses a duff device.
-(Move [s] dst src mem)
-	&& s > 64 && s <= 16*64 && s%16 == 0
-	&& logLargeCopy(v, s) =>
-	(DUFFCOPY [s] dst src mem)
+// Copying up to ~1KB uses a small loop.
+(Move [s] dst src mem) && s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s) => (LoweredMoveLoop [s] dst src mem)

 // Large copying uses REP MOVSQ.
-(Move [s] dst src mem) && s > 16*64 && s%8 == 0 && logLargeCopy(v, s) =>
+(Move [s] dst src mem) && s > repMoveThreshold && s%8 != 0 =>
+	(Move [s-s%8]
+		(OffPtr <dst.Type> dst [s%8])
+		(OffPtr <src.Type> src [s%8])
+		(MOVQstore dst (MOVQload src mem) mem))
+(Move [s] dst src mem) && s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s) =>
 	(REPMOVSQ dst src (MOVQconst [s/8]) mem)

 // Lowering Zero instructions
@ -606,31 +579,31 @@
 // mutandis, for UGE and SETAE, and CC and SETCC.
 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
-((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
-    => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
-((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
-    => ((ULT|UGE) (BTQconst [int8(log32(c))] x))
-((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
-    => ((ULT|UGE) (BTQconst [int8(log64(c))] x))
+((NE|EQ) (TESTLconst [c] x)) && isUnsignedPowerOfTwo(uint32(c))
+    => ((ULT|UGE) (BTLconst [int8(log32u(uint32(c)))] x))
+((NE|EQ) (TESTQconst [c] x)) && isUnsignedPowerOfTwo(uint64(c))
+    => ((ULT|UGE) (BTQconst [int8(log32u(uint32(c)))] x))
+((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUnsignedPowerOfTwo(uint64(c))
+    => ((ULT|UGE) (BTQconst [int8(log64u(uint64(c)))] x))
 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
-(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
-    => (SET(B|AE)  (BTLconst [int8(log32(c))] x))
-(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
-    => (SET(B|AE)  (BTQconst [int8(log32(c))] x))
-(SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
-    => (SET(B|AE)  (BTQconst [int8(log64(c))] x))
+(SET(NE|EQ) (TESTLconst [c] x)) && isUnsignedPowerOfTwo(uint32(c))
+    => (SET(B|AE)  (BTLconst [int8(log32u(uint32(c)))] x))
+(SET(NE|EQ) (TESTQconst [c] x)) && isUnsignedPowerOfTwo(uint64(c))
+    => (SET(B|AE)  (BTQconst [int8(log32u(uint32(c)))] x))
+(SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUnsignedPowerOfTwo(uint64(c))
+    => (SET(B|AE)  (BTQconst [int8(log64u(uint64(c)))] x))
 // SET..store variant
 (SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
    => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
    => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
-    => (SET(B|AE)store  [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
-    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
-(SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c)
-    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUnsignedPowerOfTwo(uint32(c))
+    => (SET(B|AE)store  [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUnsignedPowerOfTwo(uint64(c))
+    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUnsignedPowerOfTwo(uint64(c))
+    => (SET(B|AE)store  [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem)

 // Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
 // and further combining shifts.
@ -655,14 +628,14 @@
 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
 // Note: only convert OR/XOR to BTS/BTC if the constant wouldn't fit in
 // the constant field of the OR/XOR instruction. See issue 61694.
-((OR|XOR)Q (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64(c))] x)
+((OR|XOR)Q (MOVQconst [c]) x) && isUnsignedPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31 => (BT(S|C)Qconst [int8(log64u(uint64(c)))] x)

 // Recognize bit clearing: a &^= 1<<b
 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
 (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
 // Note: only convert AND to BTR if the constant wouldn't fit in
 // the constant field of the AND instruction. See issue 61694.
-(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64(^c))] x)
+(ANDQ (MOVQconst [c]) x) && isUnsignedPowerOfTwo(uint64(^c)) && uint64(^c) >= 1<<31 => (BTRQconst [int8(log64u(uint64(^c)))] x)

 // Special-case bit patterns on first/last bit.
 // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@ -1014,20 +1014,38 @@ func init() {
 		// arg0 = destination pointer
 		// arg1 = source pointer
 		// arg2 = mem
-		// auxint = # of bytes to copy, must be multiple of 16
+		// auxint = # of bytes to copy
 		// returns memory
 		{
-			name:      "DUFFCOPY",
+			name:      "LoweredMove",
 			aux:       "Int64",
 			argLength: 3,
 			reg: regInfo{
-				inputs:   []regMask{buildReg("DI"), buildReg("SI")},
-				clobbers: buildReg("DI SI X0"), // uses X0 as a temporary
+				inputs:   []regMask{gp, gp},
+				clobbers: buildReg("X14"), // uses X14 as a temporary
 			},
-			clobberFlags: true,
-			//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
-			//faultOnNilArg1: true,
-			unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts
+			faultOnNilArg0: true,
+			faultOnNilArg1: true,
+		},
+		// arg0 = destination pointer
+		// arg1 = source pointer
+		// arg2 = mem
+		// auxint = # of bytes to copy
+		// returns memory
+		{
+			name:      "LoweredMoveLoop",
+			aux:       "Int64",
+			argLength: 3,
+			reg: regInfo{
+				inputs:       []regMask{gp, gp},
+				clobbers:     buildReg("X14"), // uses X14 as a temporary
+				clobbersArg0: true,
+				clobbersArg1: true,
+			},
+			clobberFlags:   true,
+			faultOnNilArg0: true,
+			faultOnNilArg1: true,
+			needIntTemp:    true,
 		},

 		// arg0 = destination pointer
--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@ -392,44 +392,8 @@
 (Zero [16] ptr mem) =>
 	(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)

-(Zero [32] ptr mem) =>
-	(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
-		(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
-
-(Zero [48] ptr mem) =>
-	(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
-		(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
-			(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
-
-(Zero [64] ptr mem) =>
-	(STP [48] ptr (MOVDconst [0]) (MOVDconst [0])
-		(STP [32] ptr (MOVDconst [0]) (MOVDconst [0])
-			(STP [16] ptr (MOVDconst [0]) (MOVDconst [0])
-				(STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
-
-// strip off fractional word zeroing
-(Zero [s] ptr mem) && s%16 != 0 && s%16 <= 8 && s > 16 =>
-	(Zero [8]
-		(OffPtr <ptr.Type> ptr [s-8])
-		(Zero [s-s%16] ptr mem))
-(Zero [s] ptr mem) && s%16 != 0 && s%16 > 8 && s > 16 =>
-	(Zero [16]
-		(OffPtr <ptr.Type> ptr [s-16])
-		(Zero [s-s%16] ptr mem))
-
-// medium zeroing uses a duff device
-// 4, 16, and 64 are magic constants, see runtime/mkduff.go
-(Zero [s] ptr mem)
-	&& s%16 == 0 && s > 64 && s <= 16*64 =>
-	(DUFFZERO [4 * (64 - s/16)] ptr mem)
-
-// large zeroing uses a loop
-(Zero [s] ptr mem)
-	&& s%16 == 0 && s > 16*64 =>
-	(LoweredZero
-		ptr
-		(ADDconst <ptr.Type> [s-16] ptr)
-		mem)
+(Zero [s] ptr mem) && s > 16 && s < 192 => (LoweredZero [s] ptr mem)
+(Zero [s] ptr mem) && s >= 192 => (LoweredZeroLoop [s] ptr mem)

 // moves
 (Move [0] _   _   mem) => mem
--- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
@ -536,44 +536,36 @@ func init() {
 		{name: "LessThanNoov", argLength: 1, reg: readflags},     // bool, true flags encode signed x<y but without honoring overflow, false otherwise.
 		{name: "GreaterEqualNoov", argLength: 1, reg: readflags}, // bool, true flags encode signed x>=y but without honoring overflow, false otherwise.

-		// duffzero
+		// medium zeroing
 		// arg0 = address of memory to zero
 		// arg1 = mem
-		// auxint = offset into duffzero code to start executing
+		// auxint = # of bytes to zero
 		// returns mem
-		// R20 changed as side effect
-		// R16 and R17 may be clobbered by linker trampoline.
 		{
-			name:      "DUFFZERO",
+			name:      "LoweredZero",
 			aux:       "Int64",
 			argLength: 2,
 			reg: regInfo{
-				inputs:   []regMask{buildReg("R20")},
-				clobbers: buildReg("R16 R17 R20 R30"),
+				inputs: []regMask{gp},
 			},
-			//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
-			unsafePoint: true, // FP maintenance around DUFFZERO can be clobbered by interrupts
+			faultOnNilArg0: true,
 		},

 		// large zeroing
-		// arg0 = address of memory to zero (in R16 aka arm64.REGRT1, changed as side effect)
-		// arg1 = address of the last 16-byte unit to zero
-		// arg2 = mem
+		// arg0 = address of memory to zero
+		// arg1 = mem
+		// auxint = # of bytes to zero
 		// returns mem
-		//	STP.P	(ZR,ZR), 16(R16)
-		//	CMP	Rarg1, R16
-		//	BLE	-2(PC)
-		// Note: the-end-of-the-memory may be not a valid pointer. it's a problem if it is spilled.
-		// the-end-of-the-memory - 16 is with the area to zero, ok to spill.
 		{
-			name:      "LoweredZero",
-			argLength: 3,
+			name:      "LoweredZeroLoop",
+			aux:       "Int64",
+			argLength: 2,
 			reg: regInfo{
-				inputs:   []regMask{buildReg("R16"), gp},
-				clobbers: buildReg("R16"),
+				inputs:       []regMask{gp},
+				clobbersArg0: true,
 			},
-			clobberFlags:   true,
 			faultOnNilArg0: true,
+			needIntTemp:    true,
 		},

 		// duffcopy
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@ -517,7 +517,7 @@
 (GetCallerSP ...) => (LoweredGetCallerSP ...)
 (GetCallerPC ...) => (LoweredGetCallerPC ...)

-(If cond yes no) => (NE (MOVBUreg <typ.UInt64> cond) yes no)
+(If cond yes no) => (NEZ (MOVBUreg <typ.UInt64> cond) yes no)
 (MOVBUreg x:((SGT|SGTU) _ _)) => x
 (MOVBUreg x:(XOR (MOVVconst [1]) ((SGT|SGTU) _ _))) => x

@ -755,6 +755,9 @@

 (MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}

+(MULV (NEGV x) (MOVVconst [c])) => (MULV x (MOVVconst [-c]))
+(MULV (NEGV x) (NEGV y)) => (MULV x y)
+
 // div by constant
 (DIVVU x (MOVVconst [1])) => x
 (DIVVU x (MOVVconst [c])) && isPowerOfTwo(c) => (SRLVconst [log64(c)] x)
@ -899,41 +902,46 @@
 // Optimizations

 // Absorb boolean tests into block
-(NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no)
-(NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no)
-(EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no)
-(EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no)
-(NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no)
-(EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no)
-(NE (SGTUconst [1] x) yes no) => (EQ x yes no)
-(EQ (SGTUconst [1] x) yes no) => (NE x yes no)
-(NE (SGTU x (MOVVconst [0])) yes no) => (NE x yes no)
-(EQ (SGTU x (MOVVconst [0])) yes no) => (EQ x yes no)
-(NE (SGTconst [0] x) yes no) => (LTZ x yes no)
-(EQ (SGTconst [0] x) yes no) => (GEZ x yes no)
-(NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no)
-(EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no)
+(NEZ (FPFlagTrue cmp) yes no) => (FPT cmp yes no)
+(NEZ (FPFlagFalse cmp) yes no) => (FPF cmp yes no)
+(EQZ (FPFlagTrue cmp) yes no) => (FPF cmp yes no)
+(EQZ (FPFlagFalse cmp) yes no) => (FPT cmp yes no)
+(NEZ (XORconst [1] cmp:(SGT _ _)) yes no) => (EQZ cmp yes no)
+(NEZ (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQZ cmp yes no)
+(NEZ (XORconst [1] cmp:(SGTconst _)) yes no) => (EQZ cmp yes no)
+(NEZ (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQZ cmp yes no)
+(EQZ (XORconst [1] cmp:(SGT _ _)) yes no) => (NEZ cmp yes no)
+(EQZ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NEZ cmp yes no)
+(EQZ (XORconst [1] cmp:(SGTconst _)) yes no) => (NEZ cmp yes no)
+(EQZ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NEZ cmp yes no)
+(NEZ (SGTUconst [1] x) yes no) => (EQZ x yes no)
+(EQZ (SGTUconst [1] x) yes no) => (NEZ x yes no)
+(NEZ (SGTU x (MOVVconst [0])) yes no) => (NEZ x yes no)
+(EQZ (SGTU x (MOVVconst [0])) yes no) => (EQZ x yes no)
+(NEZ (SGTconst [0] x) yes no) => (LTZ x yes no)
+(EQZ (SGTconst [0] x) yes no) => (GEZ x yes no)
+(NEZ (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no)
+(EQZ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no)

-(EQ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (EQ (SGTUconst [c] y) yes no)
-(NE (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (NE (SGTUconst [c] y) yes no)
-(EQ (SUBV x y) yes no) => (BEQ x y yes no)
-(NE (SUBV x y) yes no) => (BNE x y yes no)
-(EQ (SGT x y) yes no) => (BGE y x yes no)
-(NE (SGT x y) yes no) => (BLT y x yes no)
-(EQ (SGTU x y) yes no) => (BGEU y x yes no)
-(NE (SGTU x y) yes no) => (BLTU y x yes no)
+// Convert EQZ/NEZ into more optimal branch conditions.
+(EQZ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (EQZ (SGTUconst [c] y) yes no)
+(NEZ (SGTU (MOVVconst [c]) y) yes no) && c >= -2048 && c <= 2047 => (NEZ (SGTUconst [c] y) yes no)
+(EQZ (SUBV x y) yes no) => (BEQ x y yes no)
+(NEZ (SUBV x y) yes no) => (BNE x y yes no)
+(EQZ (SGT x y) yes no) => (BGE y x yes no)
+(NEZ (SGT x y) yes no) => (BLT y x yes no)
+(EQZ (SGTU x y) yes no) => (BGEU y x yes no)
+(NEZ (SGTU x y) yes no) => (BLTU y x yes no)
+(EQZ (SGTconst [c] y) yes no) => (BGE y (MOVVconst [c]) yes no)
+(NEZ (SGTconst [c] y) yes no) => (BLT y (MOVVconst [c]) yes no)
+(EQZ (SGTUconst [c] y) yes no) => (BGEU y (MOVVconst [c]) yes no)
+(NEZ (SGTUconst [c] y) yes no) => (BLTU y (MOVVconst [c]) yes no)

 // absorb constants into branches
-(EQ  (MOVVconst [0]) yes no) => (First yes no)
-(EQ  (MOVVconst [c]) yes no) && c != 0 => (First no yes)
-(NE  (MOVVconst [0]) yes no) => (First no yes)
-(NE  (MOVVconst [c]) yes no) && c != 0 => (First yes no)
+(EQZ  (MOVVconst [0]) yes no) => (First yes no)
+(EQZ  (MOVVconst [c]) yes no) && c != 0 => (First no yes)
+(NEZ  (MOVVconst [0]) yes no) => (First no yes)
+(NEZ  (MOVVconst [c]) yes no) && c != 0 => (First yes no)
 (LTZ (MOVVconst [c]) yes no) && c <  0 => (First yes no)
 (LTZ (MOVVconst [c]) yes no) && c >= 0 => (First no yes)
 (LEZ (MOVVconst [c]) yes no) && c <= 0 => (First yes no)
@ -943,6 +951,22 @@
 (GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
 (GEZ (MOVVconst [c]) yes no) && c <  0 => (First no yes)

+// absorb NEGV into branches
+(EQZ (NEGV x) yes no) => (EQZ x yes no)
+(NEZ (NEGV x) yes no) => (NEZ x yes no)
+
+// Convert branch with zero to more optimal branch zero.
+(BEQ  (MOVVconst [0]) cond yes no) => (EQZ cond yes no)
+(BEQ  cond (MOVVconst [0]) yes no) => (EQZ cond yes no)
+(BNE  (MOVVconst [0]) cond yes no) => (NEZ cond yes no)
+(BNE  cond (MOVVconst [0]) yes no) => (NEZ cond yes no)
+(BLT  (MOVVconst [0]) cond yes no) => (GTZ cond yes no)
+(BLT  cond (MOVVconst [0]) yes no) => (LTZ cond yes no)
+(BLTU (MOVVconst [0]) cond yes no) => (NEZ cond yes no)
+(BGE  (MOVVconst [0]) cond yes no) => (LEZ cond yes no)
+(BGE  cond (MOVVconst [0]) yes no) => (GEZ cond yes no)
+(BGEU (MOVVconst [0]) cond yes no) => (EQZ cond yes no)
+
 // Arch-specific inlining for small or disjoint runtime.memmove
 // Match post-lowering calls, register version.
 (SelectN [0] call:(CALLstatic {sym} dst src (MOVVconst [sz]) mem))
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@ -577,11 +577,13 @@ func init() {
 		//   is $hint and bit[41:5] is $n.
 		{name: "PRELD", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELD", hasSideEffects: true},
 		{name: "PRELDX", argLength: 2, aux: "Int64", reg: preldreg, asm: "PRELDX", hasSideEffects: true},
+
+		{name: "ADDshiftLLV", argLength: 2, aux: "Int64", reg: gp21, asm: "ALSLV"}, // arg0 + arg1<<auxInt, the value of auxInt should be in the range [1, 4].
 	}

 	blocks := []blockData{
-		{name: "EQ", controls: 1},
-		{name: "NE", controls: 1},
+		{name: "EQZ", controls: 1},  // = 0
+		{name: "NEZ", controls: 1},  // != 0
 		{name: "LTZ", controls: 1},  // < 0
 		{name: "LEZ", controls: 1},  // <= 0
 		{name: "GTZ", controls: 1},  // > 0
@ -589,7 +591,7 @@ func init() {
 		{name: "FPT", controls: 1},  // FP flag is true
 		{name: "FPF", controls: 1},  // FP flag is false
 		{name: "BEQ", controls: 2},  // controls[0] == controls[1]
-		{name: "BNE", controls: 2},  // controls[0] == controls[1]
+		{name: "BNE", controls: 2},  // controls[0] != controls[1]
 		{name: "BGE", controls: 2},  // controls[0] >= controls[1]
 		{name: "BLT", controls: 2},  // controls[0] < controls[1]
 		{name: "BGEU", controls: 2}, // controls[0] >= controls[1], unsigned
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules
@ -4,3 +4,6 @@

 // Prefer addition when shifting left by one.
 (SLLVconst [1] x) => (ADDV x x)
+
+(EQZ (XOR x y) yes no) => (BEQ x y yes no)
+(NEZ (XOR x y) yes no) => (BNE x y yes no)
--- a/src/cmd/compile/internal/ssa/_gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/PPC64.rules
@ -553,9 +553,11 @@
 // Publication barrier as intrinsic
 (PubBarrier ...) => (LoweredPubBarrier ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // Optimizations
 // Note that PPC "logical" immediates come in 0:15 and 16:31 unsigned immediate forms,
--- a/src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/PPC64Ops.go
@ -171,10 +171,7 @@ func init() {
 		fpstore     = regInfo{inputs: []regMask{gp | sp | sb, fp}}
 		fpstoreidx  = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, fp}}
 		callerSave  = regMask(gp | fp | gr | xer)
-		r3          = buildReg("R3")
-		r4          = buildReg("R4")
-		r5          = buildReg("R5")
-		r6          = buildReg("R6")
+		first7      = buildReg("R3 R4 R5 R6 R7 R8 R9")
 	)
 	ops := []opData{
 		{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true},                              // arg0 + arg1
@ -706,12 +703,16 @@ func init() {
 		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ buildReg("R0 R3 R4 R5 R6 R7 R8 R9 R10 R14 R15 R16 R17 R20 R21 g")) | buildReg("R31"), outputs: []regMask{buildReg("R29")}}, clobberFlags: true, aux: "Int64"},

 		{name: "LoweredPubBarrier", argLength: 1, asm: "LWSYNC", hasSideEffects: true}, // Do data barrier. arg0=memory
-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r6}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r5}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first7, first7}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first7}}, typ: "Mem", call: true},  // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                          // arg0=mem, returns memory.

 		// (InvertFlags (CMP a b)) == (CMP b a)
 		// So if we want (LessThan (CMP a b)) but we can't do that because a is a constant,
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@ -299,6 +299,11 @@
 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
 	(MOV(B|BU|H|HU|W|WU|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)

+(FMOV(W|D)load [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) &&
+	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
+	(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+	(FMOV(W|D)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
+
 (MOV(B|H|W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
 	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
@ -309,15 +314,26 @@
 	(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
 	(MOV(B|H|W|D)storezero [off1+off2] {mergeSym(sym1,sym2)} base mem)

+(FMOV(W|D)store [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) &&
+	is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) &&
+	(base.Op != OpSB || !config.ctxt.Flag_dynlink) =>
+	(FMOV(W|D)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+
 (MOV(B|BU|H|HU|W|WU|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
 	(MOV(B|BU|H|HU|W|WU|D)load [off1+int32(off2)] {sym} base mem)

+(FMOV(W|D)load [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
+	(FMOV(W|D)load [off1+int32(off2)] {sym} base mem)
+
 (MOV(B|H|W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
 	(MOV(B|H|W|D)store [off1+int32(off2)] {sym} base val mem)

 (MOV(B|H|W|D)storezero [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
 	(MOV(B|H|W|D)storezero [off1+int32(off2)] {sym} base mem)

+(FMOV(W|D)store [off1] {sym} (ADDI [off2] base) val mem) && is32Bit(int64(off1)+off2) =>
+	(FMOV(W|D)store [off1+int32(off2)] {sym} base val mem)
+
 // Similarly, fold ADDI into MOVaddr to avoid confusing live variable analysis
 // with OffPtr -> ADDI.
 (ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
@ -701,6 +717,13 @@
 (MOVHUreg <t> x:(MOVHload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
 (MOVWUreg <t> x:(MOVWload  [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)

+// Replace load from same location as preceding store with copy.
+(MOVDload  [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXD x)
+(FMOVDload [off] {sym} ptr1 (MOVDstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVDX x)
+(MOVWload  [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVXS x)
+(MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (MOVWUreg (FMVXS x))
+(FMOVWload [off] {sym} ptr1 (MOVWstore  [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) => (FMVSX x)
+
 // If a register move has only 1 use, just use the same register without emitting instruction
 // MOVnop does not emit an instruction, only for ensuring the type.
 (MOVDreg x) && x.Uses == 1 => (MOVDnop x)
@ -839,6 +862,18 @@
 (F(MADD|NMADD|MSUB|NMSUB)D neg:(FNEGD x) y z) && neg.Uses == 1 => (F(NMSUB|MSUB|NMADD|MADD)D x y z)
 (F(MADD|NMADD|MSUB|NMSUB)D x y neg:(FNEGD z)) && neg.Uses == 1 => (F(MSUB|NMSUB|MADD|NMADD)D x y z)

+// Test for -∞ (bit 0) using 64 bit classify instruction.
+(FLTD x (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))]))) => (ANDI [1] (FCLASSD x))
+(FLED (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])) x) => (SNEZ (ANDI <typ.Int64> [0xff &^ 1] (FCLASSD x)))
+(FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))  => (ANDI [1] (FCLASSD x))
+(FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))  => (SEQZ (ANDI <typ.Int64> [1] (FCLASSD x)))
+
+// Test for +∞ (bit 7) using 64 bit classify instruction.
+(FLTD (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])) x) => (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+(FLED x (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))]))) => (SNEZ (ANDI <typ.Int64> [0xff &^ (1<<7)] (FCLASSD x)))
+(FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))]))) => (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+(FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))]))) => (SEQZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+
 //
 // Optimisations for rva22u64 and above.
 //
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@ -453,7 +453,8 @@ func init() {
 		{name: "FNMSUBS", argLength: 3, reg: fp31, asm: "FNMSUBS", commutative: true, typ: "Float32"},                                       // -(arg0 * arg1) - arg2
 		{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS", typ: "Float32"},                                                            // sqrt(arg0)
 		{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS", typ: "Float32"},                                                              // -arg0
-		{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float
+		{name: "FMVSX", argLength: 1, reg: gpfp, asm: "FMVSX", typ: "Float32"},                                                              // reinterpret arg0 as float32
+		{name: "FMVXS", argLength: 1, reg: fpgp, asm: "FMVXS", typ: "Int32"},                                                                // reinterpret arg0 as int32, sign extended to 64 bits
 		{name: "FCVTSW", argLength: 1, reg: gpfp, asm: "FCVTSW", typ: "Float32"},                                                            // float32(low 32 bits of arg0)
 		{name: "FCVTSL", argLength: 1, reg: gpfp, asm: "FCVTSL", typ: "Float32"},                                                            // float32(arg0)
 		{name: "FCVTWS", argLength: 1, reg: fpgp, asm: "FCVTWS", typ: "Int32"},                                                              // int32(arg0)
@ -480,7 +481,8 @@ func init() {
 		{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD", typ: "Float64"},                                                              // -arg0
 		{name: "FABSD", argLength: 1, reg: fp11, asm: "FABSD", typ: "Float64"},                                                              // abs(arg0)
 		{name: "FSGNJD", argLength: 2, reg: fp21, asm: "FSGNJD", typ: "Float64"},                                                            // copy sign of arg1 to arg0
-		{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float
+		{name: "FMVDX", argLength: 1, reg: gpfp, asm: "FMVDX", typ: "Float64"},                                                              // reinterpret arg0 as float64
+		{name: "FMVXD", argLength: 1, reg: fpgp, asm: "FMVXD", typ: "Int64"},                                                                // reinterpret arg0 as int64
 		{name: "FCVTDW", argLength: 1, reg: gpfp, asm: "FCVTDW", typ: "Float64"},                                                            // float64(low 32 bits of arg0)
 		{name: "FCVTDL", argLength: 1, reg: gpfp, asm: "FCVTDL", typ: "Float64"},                                                            // float64(arg0)
 		{name: "FCVTWD", argLength: 1, reg: fpgp, asm: "FCVTWD", typ: "Int32"},                                                              // int32(arg0)
@ -495,6 +497,27 @@ func init() {
 		{name: "FLED", argLength: 2, reg: fp2gp, asm: "FLED"},                                                                               // arg0 <= arg1
 		{name: "LoweredFMIND", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMIND", commutative: true, typ: "Float64"},             // min(arg0, arg1)
 		{name: "LoweredFMAXD", argLength: 2, reg: fp21, resultNotInArgs: true, asm: "FMAXD", commutative: true, typ: "Float64"},             // max(arg0, arg1)
+
+		// Floating point classify (in the F and D extensions).
+		//
+		// The FCLASS instructions will always set exactly one bit in the output
+		// register, all other bits will be cleared.
+		//
+		//   Bit | Class
+		//   ====+=============================
+		//     0 | -∞
+		//     1 | a negative normal number
+		//     2 | a negative subnormal number
+		//     3 | -0
+		//     4 | +0
+		//     5 | a positive subnormal number
+		//     6 | a positive normal number
+		//     7 | +∞
+		//     8 | qNaN
+		//     9 | sNaN
+		//   ====+=============================
+		{name: "FCLASSS", argLength: 1, reg: fpgp, asm: "FCLASSS", typ: "Int64"}, // classify float32
+		{name: "FCLASSD", argLength: 1, reg: fpgp, asm: "FCLASSD", typ: "Int64"}, // classify float64
 	}

 	RISCV64blocks := []blockData{
--- a/src/cmd/compile/internal/ssa/biasedsparsemap.go
+++ b/src/cmd/compile/internal/ssa/biasedsparsemap.go
@ -84,14 +84,6 @@ func (s *biasedSparseMap) getEntry(i int) (x uint, v int32) {
 	return
 }

-// add inserts x->0 into s, provided that x is in the range of keys stored in s.
-func (s *biasedSparseMap) add(x uint) {
-	if int(x) < s.first || int(x) >= s.cap() {
-		return
-	}
-	s.s.set(ID(int(x)-s.first), 0)
-}
-
 // add inserts x->v into s, provided that x is in the range of keys stored in s.
 func (s *biasedSparseMap) set(x uint, v int32) {
 	if int(x) < s.first || int(x) >= s.cap() {
--- a/src/cmd/compile/internal/ssa/branchelim.go
+++ b/src/cmd/compile/internal/ssa/branchelim.go
@ -436,8 +436,15 @@ func canSpeculativelyExecute(b *Block) bool {
 	// don't fuse memory ops, Phi ops, divides (can panic),
 	// or anything else with side-effects
 	for _, v := range b.Values {
-		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) || v.Type.IsMemory() ||
-			v.MemoryArg() != nil || opcodeTable[v.Op].hasSideEffects {
+		if v.Op == OpPhi || isDivMod(v.Op) || isPtrArithmetic(v.Op) ||
+			v.Type.IsMemory() || opcodeTable[v.Op].hasSideEffects {
+			return false
+		}
+
+		// Allow inlining markers to be speculatively executed
+		// even though they have a memory argument.
+		// See issue #74915.
+		if v.Op != OpInlMark && v.MemoryArg() != nil {
 			return false
 		}
 	}
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@ -574,7 +574,7 @@ func (c *Config) buildRecipes(arch string) {
 		}
 	case "loong64":
 		// - multiply is 4 cycles.
-		// - add/sub/shift are 1 cycle.
+		// - add/sub/shift/alsl are 1 cycle.
 		// On loong64, using a multiply also needs to load the constant into a register.
 		// TODO: figure out a happy medium.
 		mulCost = 45
@ -609,6 +609,15 @@ func (c *Config) buildRecipes(arch string) {
 					return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
 				})
 		}
+
+		// ADDshiftLLV
+		for i := 1; i < 5; i++ {
+			c := 10
+			r(1, 1<<i, c,
+				func(m, x, y *Value) *Value {
+					return m.Block.NewValue2I(m.Pos, OpLOONG64ADDshiftLLV, m.Type, int64(i), x, y)
+				})
+		}
 	}

 	c.mulRecipes = map[int64]mulRecipe{}
@ -726,7 +735,7 @@ func (c *Config) buildRecipes(arch string) {
 	// Currently:
 	// len(c.mulRecipes) == 5984 on arm64
 	//                       680 on amd64
-	//                      5984 on loong64
+	//                      9738 on loong64
 	// This function takes ~2.5ms on arm64.
 	//println(len(c.mulRecipes))
 }
--- a/src/cmd/compile/internal/ssa/debug.go
+++ b/src/cmd/compile/internal/ssa/debug.go
@ -77,10 +77,6 @@ func (ls *liveSlot) String() string {
 	return fmt.Sprintf("0x%x.%d.%d", ls.Registers, ls.stackOffsetValue(), int32(ls.StackOffset)&1)
 }

-func (ls liveSlot) absent() bool {
-	return ls.Registers == 0 && !ls.onStack()
-}
-
 // StackOffset encodes whether a value is on the stack and if so, where.
 // It is a 31-bit integer followed by a presence flag at the low-order
 // bit.
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@ -853,27 +853,6 @@ func (c *registerCursor) plus(regWidth Abi1RO) registerCursor {
 	return rc
 }

-// at returns the register cursor for component i of t, where the first
-// component is numbered 0.
-func (c *registerCursor) at(t *types.Type, i int) registerCursor {
-	rc := *c
-	if i == 0 || len(c.regs) == 0 {
-		return rc
-	}
-	if t.IsArray() {
-		w := c.config.NumParamRegs(t.Elem())
-		rc.nextSlice += Abi1RO(i * w)
-		return rc
-	}
-	if isStructNotSIMD(t) {
-		for j := 0; j < i; j++ {
-			rc.next(t.FieldType(j))
-		}
-		return rc
-	}
-	panic("Haven't implemented this case yet, do I need to?")
-}
-
 func (c *registerCursor) init(regs []abi.RegIndex, info *abi.ABIParamResultInfo, result *[]*Value, storeDest *Value, storeOffset int64) {
 	c.regs = regs
 	c.nextSlice = 0
@ -932,17 +911,6 @@ type expandState struct {
 	indentLevel     int               // Indentation for debugging recursion
 }

-// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
-// that has no 64-bit integer registers.
-func (x *expandState) intPairTypes(et types.Kind) (tHi, tLo *types.Type) {
-	tHi = x.typs.UInt32
-	if et == types.TINT64 {
-		tHi = x.typs.Int32
-	}
-	tLo = x.typs.UInt32
-	return
-}
-
 // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
 func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.Type) *Value {
 	ft := from.Type
@ -966,29 +934,6 @@ func (x *expandState) offsetFrom(b *Block, from *Value, offset int64, pt *types.
 	return b.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
 }

-func (x *expandState) regWidth(t *types.Type) Abi1RO {
-	return Abi1RO(x.f.ABI1.NumParamRegs(t))
-}
-
-// regOffset returns the register offset of the i'th element of type t
-func (x *expandState) regOffset(t *types.Type, i int) Abi1RO {
-	// TODO maybe cache this in a map if profiling recommends.
-	if i == 0 {
-		return 0
-	}
-	if t.IsArray() {
-		return Abi1RO(i) * x.regWidth(t.Elem())
-	}
-	if isStructNotSIMD(t) {
-		k := Abi1RO(0)
-		for j := 0; j < i; j++ {
-			k += x.regWidth(t.FieldType(j))
-		}
-		return k
-	}
-	panic("Haven't implemented this case yet, do I need to?")
-}
-
 // prAssignForArg returns the ABIParamAssignment for v, assumed to be an OpArg.
 func (x *expandState) prAssignForArg(v *Value) *abi.ABIParamAssignment {
 	if v.Op != OpArg {
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@ -485,53 +485,6 @@ const (
 	BoundsKindCount
 )

-// boundsABI determines which register arguments a bounds check call should use. For an [a:b:c] slice, we do:
-//
-//	CMPQ c, cap
-//	JA   fail1
-//	CMPQ b, c
-//	JA   fail2
-//	CMPQ a, b
-//	JA   fail3
-//
-// fail1: CALL panicSlice3Acap (c, cap)
-// fail2: CALL panicSlice3B (b, c)
-// fail3: CALL panicSlice3C (a, b)
-//
-// When we register allocate that code, we want the same register to be used for
-// the first arg of panicSlice3Acap and the second arg to panicSlice3B. That way,
-// initializing that register once will satisfy both calls.
-// That desire ends up dividing the set of bounds check calls into 3 sets. This function
-// determines which set to use for a given panic call.
-// The first arg for set 0 should be the second arg for set 1.
-// The first arg for set 1 should be the second arg for set 2.
-func boundsABI(b int64) int {
-	switch BoundsKind(b) {
-	case BoundsSlice3Alen,
-		BoundsSlice3AlenU,
-		BoundsSlice3Acap,
-		BoundsSlice3AcapU,
-		BoundsConvert:
-		return 0
-	case BoundsSliceAlen,
-		BoundsSliceAlenU,
-		BoundsSliceAcap,
-		BoundsSliceAcapU,
-		BoundsSlice3B,
-		BoundsSlice3BU:
-		return 1
-	case BoundsIndex,
-		BoundsIndexU,
-		BoundsSliceB,
-		BoundsSliceBU,
-		BoundsSlice3C,
-		BoundsSlice3CU:
-		return 2
-	default:
-		panic("bad BoundsKind")
-	}
-}
-
 // Returns the bounds error code needed by the runtime, and
 // whether the x field is signed.
 func (b BoundsKind) Code() (rtabi.BoundsErrorCode, bool) {
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@ -94,8 +94,8 @@ const (
 	BlockARM64GEnoov
 	BlockARM64JUMPTABLE

-	BlockLOONG64EQ
-	BlockLOONG64NE
+	BlockLOONG64EQZ
+	BlockLOONG64NEZ
 	BlockLOONG64LTZ
 	BlockLOONG64LEZ
 	BlockLOONG64GTZ
@ -250,8 +250,8 @@ var blockString = [...]string{
 	BlockARM64GEnoov:    "GEnoov",
 	BlockARM64JUMPTABLE: "JUMPTABLE",

-	BlockLOONG64EQ:   "EQ",
-	BlockLOONG64NE:   "NE",
+	BlockLOONG64EQZ:  "EQZ",
+	BlockLOONG64NEZ:  "NEZ",
 	BlockLOONG64LTZ:  "LTZ",
 	BlockLOONG64LEZ:  "LEZ",
 	BlockLOONG64GTZ:  "GTZ",
@ -1058,7 +1058,8 @@ const (
 	OpAMD64CALLtail
 	OpAMD64CALLclosure
 	OpAMD64CALLinter
-	OpAMD64DUFFCOPY
+	OpAMD64LoweredMove
+	OpAMD64LoweredMoveLoop
 	OpAMD64REPMOVSQ
 	OpAMD64InvertFlags
 	OpAMD64LoweredGetG
@ -2891,8 +2892,8 @@ const (
 	OpARM64NotGreaterEqualF
 	OpARM64LessThanNoov
 	OpARM64GreaterEqualNoov
-	OpARM64DUFFZERO
 	OpARM64LoweredZero
+	OpARM64LoweredZeroLoop
 	OpARM64DUFFCOPY
 	OpARM64LoweredMove
 	OpARM64LoweredGetClosurePtr
@ -3144,6 +3145,7 @@ const (
 	OpLOONG64LoweredPanicBoundsCC
 	OpLOONG64PRELD
 	OpLOONG64PRELDX
+	OpLOONG64ADDshiftLLV

 	OpMIPSADD
 	OpMIPSADDconst
@ -3627,9 +3629,10 @@ const (
 	OpPPC64LoweredAtomicOr32
 	OpPPC64LoweredWB
 	OpPPC64LoweredPubBarrier
-	OpPPC64LoweredPanicBoundsA
-	OpPPC64LoweredPanicBoundsB
-	OpPPC64LoweredPanicBoundsC
+	OpPPC64LoweredPanicBoundsRR
+	OpPPC64LoweredPanicBoundsRC
+	OpPPC64LoweredPanicBoundsCR
+	OpPPC64LoweredPanicBoundsCC
 	OpPPC64InvertFlags
 	OpPPC64FlagEQ
 	OpPPC64FlagLT
@ -3774,6 +3777,7 @@ const (
 	OpRISCV64FSQRTS
 	OpRISCV64FNEGS
 	OpRISCV64FMVSX
+	OpRISCV64FMVXS
 	OpRISCV64FCVTSW
 	OpRISCV64FCVTSL
 	OpRISCV64FCVTWS
@ -3799,6 +3803,7 @@ const (
 	OpRISCV64FABSD
 	OpRISCV64FSGNJD
 	OpRISCV64FMVDX
+	OpRISCV64FMVXD
 	OpRISCV64FCVTDW
 	OpRISCV64FCVTDL
 	OpRISCV64FCVTWD
@ -3813,6 +3818,8 @@ const (
 	OpRISCV64FLED
 	OpRISCV64LoweredFMIND
 	OpRISCV64LoweredFMAXD
+	OpRISCV64FCLASSS
+	OpRISCV64FCLASSD

 	OpS390XFADDS
 	OpS390XFADD
@ -17096,17 +17103,35 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:         "DUFFCOPY",
-		auxType:      auxInt64,
-		argLen:       3,
-		clobberFlags: true,
-		unsafePoint:  true,
+		name:           "LoweredMove",
+		auxType:        auxInt64,
+		argLen:         3,
+		faultOnNilArg0: true,
+		faultOnNilArg1: true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 128}, // DI
-				{1, 64},  // SI
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
-			clobbers: 65728, // SI DI X0
+			clobbers: 1073741824, // X14
+		},
+	},
+	{
+		name:           "LoweredMoveLoop",
+		auxType:        auxInt64,
+		argLen:         3,
+		clobberFlags:   true,
+		needIntTemp:    true,
+		faultOnNilArg0: true,
+		faultOnNilArg1: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			clobbers:     1073741824, // X14
+			clobbersArg0: true,
+			clobbersArg1: true,
 		},
 	},
 	{
@ -43661,29 +43686,28 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
-	{
-		name:        "DUFFZERO",
-		auxType:     auxInt64,
-		argLen:      2,
-		unsafePoint: true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 524288}, // R20
-			},
-			clobbers: 269156352, // R16 R17 R20 R30
-		},
-	},
 	{
 		name:           "LoweredZero",
-		argLen:         3,
-		clobberFlags:   true,
+		auxType:        auxInt64,
+		argLen:         2,
 		faultOnNilArg0: true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 65536},     // R16
-				{1, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+				{0, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
 			},
-			clobbers: 65536, // R16
+		},
+	},
+	{
+		name:           "LoweredZeroLoop",
+		auxType:        auxInt64,
+		argLen:         2,
+		needIntTemp:    true,
+		faultOnNilArg0: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 335544319}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
+			},
+			clobbersArg0: true,
 		},
 	},
 	{
@ -47140,6 +47164,21 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:    "ADDshiftLLV",
+		auxType: auxInt64,
+		argLen:  2,
+		asm:     loong64.AALSLV,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+				{1, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31
+			},
+			outputs: []outputInfo{
+				{0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31
+			},
+		},
+	},

 	{
 		name:        "ADD",
@ -53635,41 +53674,46 @@ var opcodeTable = [...]opInfo{
 		reg:            regInfo{},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 32}, // R5
-				{1, 64}, // R6
+				{0, 1016}, // R3 R4 R5 R6 R7 R8 R9
+				{1, 1016}, // R3 R4 R5 R6 R7 R8 R9
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 16}, // R4
-				{1, 32}, // R5
+				{0, 1016}, // R3 R4 R5 R6 R7 R8 R9
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 8},  // R3
-				{1, 16}, // R4
+				{0, 1016}, // R3 R4 R5 R6 R7 R8 R9
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:   "InvertFlags",
 		argLen: 1,
@ -55600,6 +55644,19 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "FMVXS",
+		argLen: 1,
+		asm:    riscv.AFMVXS,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+			},
+		},
+	},
 	{
 		name:   "FCVTSW",
 		argLen: 1,
@ -55960,6 +56017,19 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "FMVXD",
+		argLen: 1,
+		asm:    riscv.AFMVXD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+			},
+		},
+	},
 	{
 		name:   "FCVTDW",
 		argLen: 1,
@ -56158,6 +56228,32 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "FCLASSS",
+		argLen: 1,
+		asm:    riscv.AFCLASSS,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+			},
+		},
+	},
+	{
+		name:   "FCLASSD",
+		argLen: 1,
+		asm:    riscv.AFCLASSD,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
+			},
+			outputs: []outputInfo{
+				{0, 1006632944}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20 X21 X22 X23 X24 X25 X26 X28 X29 X30
+			},
+		},
+	},

 	{
 		name:         "FADDS",
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@ -145,10 +145,7 @@ func (l limit) signedMin(m int64) limit {
 	l.min = max(l.min, m)
 	return l
 }
-func (l limit) signedMax(m int64) limit {
-	l.max = min(l.max, m)
-	return l
-}
+
 func (l limit) signedMinMax(minimum, maximum int64) limit {
 	l.min = max(l.min, minimum)
 	l.max = min(l.max, maximum)
@ -1622,7 +1619,16 @@ func initLimit(v *Value) limit {
 		lim = lim.unsignedMax(1)

 	// length operations
-	case OpStringLen, OpSliceLen, OpSliceCap:
+	case OpSliceLen, OpSliceCap:
+		f := v.Block.Func
+		elemSize := uint64(v.Args[0].Type.Elem().Size())
+		if elemSize > 0 {
+			heapSize := uint64(1)<<(uint64(f.Config.PtrSize)*8) - 1
+			maximumElementsFittingInHeap := heapSize / elemSize
+			lim = lim.unsignedMax(maximumElementsFittingInHeap)
+		}
+		fallthrough
+	case OpStringLen:
 		lim = lim.signedMin(0)
 	}

--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@ -561,7 +561,14 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, pos
 	pos = pos.WithNotStmt()
 	// Check if v is already in a requested register.
 	if mask&vi.regs != 0 {
-		r := pickReg(mask & vi.regs)
+		mask &= vi.regs
+		r := pickReg(mask)
+		if mask.contains(s.SPReg) {
+			// Prefer the stack pointer if it is allowed.
+			// (Needed because the op might have an Aux symbol
+			// that needs SP as its base.)
+			r = s.SPReg
+		}
 		if !s.allocatable.contains(r) {
 			return v // v is in a fixed register
 		}
@ -2484,7 +2491,7 @@ func (e *edgeState) processDest(loc Location, vid ID, splice **Value, pos src.XP
 	}

 	// Check if we're allowed to clobber the destination location.
-	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable {
+	if len(e.cache[occupant.vid]) == 1 && !e.s.values[occupant.vid].rematerializeable && !opcodeTable[e.s.orig[occupant.vid].Op].fixedReg {
 		// We can't overwrite the last copy
 		// of a value that needs to survive.
 		return false
@ -2988,11 +2995,6 @@ type desiredStateEntry struct {
 	regs [4]register
 }

-func (d *desiredState) clear() {
-	d.entries = d.entries[:0]
-	d.avoid = 0
-}
-
 // get returns a list of desired registers for value vid.
 func (d *desiredState) get(vid ID) [4]register {
 	for _, e := range d.entries {
--- a/src/cmd/compile/internal/ssa/regalloc_test.go
+++ b/src/cmd/compile/internal/ssa/regalloc_test.go
@ -240,6 +240,30 @@ func TestClobbersArg0(t *testing.T) {
 	}
 }

+func TestClobbersArg1(t *testing.T) {
+	c := testConfig(t)
+	f := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("src", OpArg, c.config.Types.Int64.PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo())),
+			Valu("dst", OpArg, c.config.Types.Int64.PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo())),
+			Valu("use1", OpArg, c.config.Types.Int64.PtrTo().PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo().PtrTo())),
+			Valu("use2", OpArg, c.config.Types.Int64.PtrTo().PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo().PtrTo())),
+			Valu("move", OpAMD64LoweredMoveLoop, types.TypeMem, 256, nil, "dst", "src", "mem"),
+			Valu("store1", OpAMD64MOVQstore, types.TypeMem, 0, nil, "use1", "src", "move"),
+			Valu("store2", OpAMD64MOVQstore, types.TypeMem, 0, nil, "use2", "dst", "store1"),
+			Exit("store2")))
+	flagalloc(f.f)
+	regalloc(f.f)
+	checkFunc(f.f)
+	// LoweredMoveLoop clobbers its arguments, so there must be a copy of "src" and "dst" somewhere
+	// so we still have that value available at the stores.
+	if n := numCopies(f.blocks["entry"]); n != 2 {
+		fmt.Printf("%s\n", f.f.String())
+		t.Errorf("got %d copies, want 2", n)
+	}
+}
+
 func numSpills(b *Block) int {
 	return numOps(b, OpStoreReg)
 }
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@ -31,6 +31,7 @@ const (
 	removeDeadValues                 = true

 	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
+	repMoveThreshold = 1408 // size beyond which we use REP MOVS for copying
 )

 // deadcode indicates whether rewrite should try to remove any values that become dead.
@ -504,18 +505,6 @@ func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
 	return n != 0 && n&(n-1) == 0
 }

-// isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
-func isUint64PowerOfTwo(in int64) bool {
-	n := uint64(in)
-	return n > 0 && n&(n-1) == 0
-}
-
-// isUint32PowerOfTwo reports whether uint32(n) is a power of 2.
-func isUint32PowerOfTwo(in int64) bool {
-	n := uint64(uint32(in))
-	return n > 0 && n&(n-1) == 0
-}
-
 // is32Bit reports whether n can be represented as a signed 32 bit integer.
 func is32Bit(n int64) bool {
 	return n == int64(int32(n))
@ -637,51 +626,16 @@ func truncate64Fto32F(f float64) float32 {
 	return math.Float32frombits(r)
 }

-// extend32Fto64F converts a float32 value to a float64 value preserving the bit
-// pattern of the mantissa.
-func extend32Fto64F(f float32) float64 {
-	if !math.IsNaN(float64(f)) {
-		return float64(f)
-	}
-	// NaN bit patterns aren't necessarily preserved across conversion
-	// instructions so we need to do the conversion manually.
-	b := uint64(math.Float32bits(f))
-	//   | sign                  | exponent      | mantissa                    |
-	r := ((b << 32) & (1 << 63)) | (0x7ff << 52) | ((b & 0x7fffff) << (52 - 23))
-	return math.Float64frombits(r)
-}
-
 // DivisionNeedsFixUp reports whether the division needs fix-up code.
 func DivisionNeedsFixUp(v *Value) bool {
 	return v.AuxInt == 0
 }

-// auxFrom64F encodes a float64 value so it can be stored in an AuxInt.
-func auxFrom64F(f float64) int64 {
-	if f != f {
-		panic("can't encode a NaN in AuxInt field")
-	}
-	return int64(math.Float64bits(f))
-}
-
-// auxFrom32F encodes a float32 value so it can be stored in an AuxInt.
-func auxFrom32F(f float32) int64 {
-	if f != f {
-		panic("can't encode a NaN in AuxInt field")
-	}
-	return int64(math.Float64bits(extend32Fto64F(f)))
-}
-
 // auxTo32F decodes a float32 from the AuxInt value provided.
 func auxTo32F(i int64) float32 {
 	return truncate64Fto32F(math.Float64frombits(uint64(i)))
 }

-// auxTo64F decodes a float64 from the AuxInt value provided.
-func auxTo64F(i int64) float64 {
-	return math.Float64frombits(uint64(i))
-}
-
 func auxIntToBool(i int64) bool {
 	if i == 0 {
 		return false
@ -715,12 +669,6 @@ func auxIntToValAndOff(i int64) ValAndOff {
 func auxIntToArm64BitField(i int64) arm64BitField {
 	return arm64BitField(i)
 }
-func auxIntToInt128(x int64) int128 {
-	if x != 0 {
-		panic("nonzero int128 not allowed")
-	}
-	return 0
-}
 func auxIntToFlagConstant(x int64) flagConstant {
 	return flagConstant(x)
 }
@ -762,12 +710,6 @@ func valAndOffToAuxInt(v ValAndOff) int64 {
 func arm64BitFieldToAuxInt(v arm64BitField) int64 {
 	return int64(v)
 }
-func int128ToAuxInt(x int128) int64 {
-	if x != 0 {
-		panic("nonzero int128 not allowed")
-	}
-	return 0
-}
 func flagConstantToAuxInt(x flagConstant) int64 {
 	return int64(x)
 }
@ -838,23 +780,6 @@ func uaddOvf(a, b int64) bool {
 	return uint64(a)+uint64(b) < uint64(a)
 }

-// loadLSymOffset simulates reading a word at an offset into a
-// read-only symbol's runtime memory. If it would read a pointer to
-// another symbol, that symbol is returned. Otherwise, it returns nil.
-func loadLSymOffset(lsym *obj.LSym, offset int64) *obj.LSym {
-	if lsym.Type != objabi.SRODATA {
-		return nil
-	}
-
-	for _, r := range lsym.R {
-		if int64(r.Off) == offset && r.Type&^objabi.R_WEAK == objabi.R_ADDR && r.Add == 0 {
-			return r.Sym
-		}
-	}
-
-	return nil
-}
-
 func devirtLECall(v *Value, sym *obj.LSym) *Value {
 	v.Op = OpStaticLECall
 	auxcall := v.Aux.(*AuxCall)
@ -1576,10 +1501,6 @@ func GetPPC64Shiftmb(auxint int64) int64 {
 	return int64(int8(auxint >> 8))
 }

-func GetPPC64Shiftme(auxint int64) int64 {
-	return int64(int8(auxint))
-}
-
 // Test if this value can encoded as a mask for a rlwinm like
 // operation.  Masks can also extend from the msb and wrap to
 // the lsb too.  That is, the valid masks are 32 bit strings
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@ -7874,8 +7874,8 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
 		break
 	}
 	// match: (ANDQ (MOVQconst [c]) x)
-	// cond: isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31
-	// result: (BTRQconst [int8(log64(^c))] x)
+	// cond: isUnsignedPowerOfTwo(uint64(^c)) && uint64(^c) >= 1<<31
+	// result: (BTRQconst [int8(log64u(uint64(^c)))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
@ -7883,11 +7883,11 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool {
 			}
 			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
-			if !(isUint64PowerOfTwo(^c) && uint64(^c) >= 1<<31) {
+			if !(isUnsignedPowerOfTwo(uint64(^c)) && uint64(^c) >= 1<<31) {
 				continue
 			}
 			v.reset(OpAMD64BTRQconst)
-			v.AuxInt = int8ToAuxInt(int8(log64(^c)))
+			v.AuxInt = int8ToAuxInt(int8(log64u(uint64(^c))))
 			v.AddArg(x)
 			return true
 		}
@ -19197,8 +19197,8 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
 		break
 	}
 	// match: (ORQ (MOVQconst [c]) x)
-	// cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
-	// result: (BTSQconst [int8(log64(c))] x)
+	// cond: isUnsignedPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31
+	// result: (BTSQconst [int8(log64u(uint64(c)))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
@ -19206,11 +19206,11 @@ func rewriteValueAMD64_OpAMD64ORQ(v *Value) bool {
 			}
 			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
-			if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
+			if !(isUnsignedPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) {
 				continue
 			}
 			v.reset(OpAMD64BTSQconst)
-			v.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v.AddArg(x)
 			return true
 		}
@ -22164,46 +22164,46 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
 		break
 	}
 	// match: (SETEQ (TESTLconst [c] x))
-	// cond: isUint32PowerOfTwo(int64(c))
-	// result: (SETAE (BTLconst [int8(log32(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SETAE (BTLconst [int8(log32u(uint32(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTLconst {
 			break
 		}
 		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint32PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint32(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAE)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETEQ (TESTQconst [c] x))
-	// cond: isUint64PowerOfTwo(int64(c))
-	// result: (SETAE (BTQconst [int8(log32(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETAE (BTQconst [int8(log32u(uint32(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQconst {
 			break
 		}
 		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint64PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAE)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETEQ (TESTQ (MOVQconst [c]) x))
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAE (BTQconst [int8(log64(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETAE (BTQconst [int8(log64u(uint64(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@ -22217,12 +22217,12 @@ func rewriteValueAMD64_OpAMD64SETEQ(v *Value) bool {
 			}
 			c := auxIntToInt64(v_0_0.AuxInt)
 			x := v_0_1
-			if !(isUint64PowerOfTwo(c)) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				continue
 			}
 			v.reset(OpAMD64SETAE)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@ -22641,8 +22641,8 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
 		break
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTLconst [c] x) mem)
-	// cond: isUint32PowerOfTwo(int64(c))
-	// result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SETAEstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -22653,21 +22653,21 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
 		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint32PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint32(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTQconst [c] x) mem)
-	// cond: isUint64PowerOfTwo(int64(c))
-	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -22678,21 +22678,21 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
 		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint64PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETAEstore)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETEQstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETAEstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -22710,14 +22710,14 @@ func rewriteValueAMD64_OpAMD64SETEQstore(v *Value) bool {
 			c := auxIntToInt64(v_1_0.AuxInt)
 			x := v_1_1
 			mem := v_2
-			if !(isUint64PowerOfTwo(c)) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				continue
 			}
 			v.reset(OpAMD64SETAEstore)
 			v.AuxInt = int32ToAuxInt(off)
 			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@ -24210,46 +24210,46 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
 		break
 	}
 	// match: (SETNE (TESTLconst [c] x))
-	// cond: isUint32PowerOfTwo(int64(c))
-	// result: (SETB (BTLconst [int8(log32(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SETB (BTLconst [int8(log32u(uint32(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTLconst {
 			break
 		}
 		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint32PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint32(c))) {
 			break
 		}
 		v.reset(OpAMD64SETB)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETNE (TESTQconst [c] x))
-	// cond: isUint64PowerOfTwo(int64(c))
-	// result: (SETB (BTQconst [int8(log32(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETB (BTQconst [int8(log32u(uint32(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQconst {
 			break
 		}
 		c := auxIntToInt32(v_0.AuxInt)
 		x := v_0.Args[0]
-		if !(isUint64PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETB)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg(v0)
 		return true
 	}
 	// match: (SETNE (TESTQ (MOVQconst [c]) x))
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETB (BTQconst [int8(log64(c))] x))
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETB (BTQconst [int8(log64u(uint64(c)))] x))
 	for {
 		if v_0.Op != OpAMD64TESTQ {
 			break
@ -24263,12 +24263,12 @@ func rewriteValueAMD64_OpAMD64SETNE(v *Value) bool {
 			}
 			c := auxIntToInt64(v_0_0.AuxInt)
 			x := v_0_1
-			if !(isUint64PowerOfTwo(c)) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				continue
 			}
 			v.reset(OpAMD64SETB)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v0.AddArg(x)
 			v.AddArg(v0)
 			return true
@ -24687,8 +24687,8 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
 		break
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTLconst [c] x) mem)
-	// cond: isUint32PowerOfTwo(int64(c))
-	// result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SETBstore [off] {sym} ptr (BTLconst [int8(log32u(uint32(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -24699,21 +24699,21 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
 		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint32PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint32(c))) {
 			break
 		}
 		v.reset(OpAMD64SETBstore)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTLconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTQconst [c] x) mem)
-	// cond: isUint64PowerOfTwo(int64(c))
-	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log32u(uint32(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -24724,21 +24724,21 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
 		c := auxIntToInt32(v_1.AuxInt)
 		x := v_1.Args[0]
 		mem := v_2
-		if !(isUint64PowerOfTwo(int64(c))) {
+		if !(isUnsignedPowerOfTwo(uint64(c))) {
 			break
 		}
 		v.reset(OpAMD64SETBstore)
 		v.AuxInt = int32ToAuxInt(off)
 		v.Aux = symToAux(sym)
 		v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-		v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+		v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 		v0.AddArg(x)
 		v.AddArg3(ptr, v0, mem)
 		return true
 	}
 	// match: (SETNEstore [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem)
-	// cond: isUint64PowerOfTwo(c)
-	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem)
+	// cond: isUnsignedPowerOfTwo(uint64(c))
+	// result: (SETBstore [off] {sym} ptr (BTQconst [int8(log64u(uint64(c)))] x) mem)
 	for {
 		off := auxIntToInt32(v.AuxInt)
 		sym := auxToSym(v.Aux)
@ -24756,14 +24756,14 @@ func rewriteValueAMD64_OpAMD64SETNEstore(v *Value) bool {
 			c := auxIntToInt64(v_1_0.AuxInt)
 			x := v_1_1
 			mem := v_2
-			if !(isUint64PowerOfTwo(c)) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				continue
 			}
 			v.reset(OpAMD64SETBstore)
 			v.AuxInt = int32ToAuxInt(off)
 			v.Aux = symToAux(sym)
 			v0 := b.NewValue0(v.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v0.AddArg(x)
 			v.AddArg3(ptr, v0, mem)
 			return true
@ -29113,8 +29113,8 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
 		break
 	}
 	// match: (XORQ (MOVQconst [c]) x)
-	// cond: isUint64PowerOfTwo(c) && uint64(c) >= 1<<31
-	// result: (BTCQconst [int8(log64(c))] x)
+	// cond: isUnsignedPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31
+	// result: (BTCQconst [int8(log64u(uint64(c)))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpAMD64MOVQconst {
@ -29122,11 +29122,11 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool {
 			}
 			c := auxIntToInt64(v_0.AuxInt)
 			x := v_1
-			if !(isUint64PowerOfTwo(c) && uint64(c) >= 1<<31) {
+			if !(isUnsignedPowerOfTwo(uint64(c)) && uint64(c) >= 1<<31) {
 				continue
 			}
 			v.reset(OpAMD64BTCQconst)
-			v.AuxInt = int8ToAuxInt(int8(log64(c)))
+			v.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 			v.AddArg(x)
 			return true
 		}
@ -44409,75 +44409,6 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
 		v.AddArg3(dst, v0, mem)
 		return true
 	}
-	// match: (Move [32] dst src mem)
-	// result: (Move [16] (OffPtr <dst.Type> dst [16]) (OffPtr <src.Type> src [16]) (Move [16] dst src mem))
-	for {
-		if auxIntToInt64(v.AuxInt) != 32 {
-			break
-		}
-		dst := v_0
-		src := v_1
-		mem := v_2
-		v.reset(OpMove)
-		v.AuxInt = int64ToAuxInt(16)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-		v0.AuxInt = int64ToAuxInt(16)
-		v0.AddArg(dst)
-		v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-		v1.AuxInt = int64ToAuxInt(16)
-		v1.AddArg(src)
-		v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem)
-		v2.AuxInt = int64ToAuxInt(16)
-		v2.AddArg3(dst, src, mem)
-		v.AddArg3(v0, v1, v2)
-		return true
-	}
-	// match: (Move [48] dst src mem)
-	// result: (Move [32] (OffPtr <dst.Type> dst [16]) (OffPtr <src.Type> src [16]) (Move [16] dst src mem))
-	for {
-		if auxIntToInt64(v.AuxInt) != 48 {
-			break
-		}
-		dst := v_0
-		src := v_1
-		mem := v_2
-		v.reset(OpMove)
-		v.AuxInt = int64ToAuxInt(32)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-		v0.AuxInt = int64ToAuxInt(16)
-		v0.AddArg(dst)
-		v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-		v1.AuxInt = int64ToAuxInt(16)
-		v1.AddArg(src)
-		v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem)
-		v2.AuxInt = int64ToAuxInt(16)
-		v2.AddArg3(dst, src, mem)
-		v.AddArg3(v0, v1, v2)
-		return true
-	}
-	// match: (Move [64] dst src mem)
-	// result: (Move [32] (OffPtr <dst.Type> dst [32]) (OffPtr <src.Type> src [32]) (Move [32] dst src mem))
-	for {
-		if auxIntToInt64(v.AuxInt) != 64 {
-			break
-		}
-		dst := v_0
-		src := v_1
-		mem := v_2
-		v.reset(OpMove)
-		v.AuxInt = int64ToAuxInt(32)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-		v0.AuxInt = int64ToAuxInt(32)
-		v0.AddArg(dst)
-		v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-		v1.AuxInt = int64ToAuxInt(32)
-		v1.AddArg(src)
-		v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem)
-		v2.AuxInt = int64ToAuxInt(32)
-		v2.AddArg3(dst, src, mem)
-		v.AddArg3(v0, v1, v2)
-		return true
-	}
 	// match: (Move [3] dst src mem)
 	// result: (MOVBstore [2] dst (MOVBload [2] src mem) (MOVWstore dst (MOVWload src mem) mem))
 	for {
@ -44670,23 +44601,55 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
 		return true
 	}
 	// match: (Move [s] dst src mem)
-	// cond: s > 16 && s%16 != 0 && s%16 <= 8
-	// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVQstore dst (MOVQload src mem) mem))
+	// cond: s > 16 && s < 192 && logLargeCopy(v, s)
+	// result: (LoweredMove [s] dst src mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		dst := v_0
 		src := v_1
 		mem := v_2
-		if !(s > 16 && s%16 != 0 && s%16 <= 8) {
+		if !(s > 16 && s < 192 && logLargeCopy(v, s)) {
+			break
+		}
+		v.reset(OpAMD64LoweredMove)
+		v.AuxInt = int64ToAuxInt(s)
+		v.AddArg3(dst, src, mem)
+		return true
+	}
+	// match: (Move [s] dst src mem)
+	// cond: s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)
+	// result: (LoweredMoveLoop [s] dst src mem)
+	for {
+		s := auxIntToInt64(v.AuxInt)
+		dst := v_0
+		src := v_1
+		mem := v_2
+		if !(s >= 192 && s <= repMoveThreshold && logLargeCopy(v, s)) {
+			break
+		}
+		v.reset(OpAMD64LoweredMoveLoop)
+		v.AuxInt = int64ToAuxInt(s)
+		v.AddArg3(dst, src, mem)
+		return true
+	}
+	// match: (Move [s] dst src mem)
+	// cond: s > repMoveThreshold && s%8 != 0
+	// result: (Move [s-s%8] (OffPtr <dst.Type> dst [s%8]) (OffPtr <src.Type> src [s%8]) (MOVQstore dst (MOVQload src mem) mem))
+	for {
+		s := auxIntToInt64(v.AuxInt)
+		dst := v_0
+		src := v_1
+		mem := v_2
+		if !(s > repMoveThreshold && s%8 != 0) {
 			break
 		}
 		v.reset(OpMove)
-		v.AuxInt = int64ToAuxInt(s - s%16)
+		v.AuxInt = int64ToAuxInt(s - s%8)
 		v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-		v0.AuxInt = int64ToAuxInt(s % 16)
+		v0.AuxInt = int64ToAuxInt(s % 8)
 		v0.AddArg(dst)
 		v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-		v1.AuxInt = int64ToAuxInt(s % 16)
+		v1.AuxInt = int64ToAuxInt(s % 8)
 		v1.AddArg(src)
 		v2 := b.NewValue0(v.Pos, OpAMD64MOVQstore, types.TypeMem)
 		v3 := b.NewValue0(v.Pos, OpAMD64MOVQload, typ.UInt64)
@ -44696,56 +44659,14 @@ func rewriteValueAMD64_OpMove(v *Value) bool {
 		return true
 	}
 	// match: (Move [s] dst src mem)
-	// cond: s > 16 && s%16 != 0 && s%16 > 8
-	// result: (Move [s-s%16] (OffPtr <dst.Type> dst [s%16]) (OffPtr <src.Type> src [s%16]) (MOVOstore dst (MOVOload src mem) mem))
-	for {
-		s := auxIntToInt64(v.AuxInt)
-		dst := v_0
-		src := v_1
-		mem := v_2
-		if !(s > 16 && s%16 != 0 && s%16 > 8) {
-			break
-		}
-		v.reset(OpMove)
-		v.AuxInt = int64ToAuxInt(s - s%16)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type)
-		v0.AuxInt = int64ToAuxInt(s % 16)
-		v0.AddArg(dst)
-		v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type)
-		v1.AuxInt = int64ToAuxInt(s % 16)
-		v1.AddArg(src)
-		v2 := b.NewValue0(v.Pos, OpAMD64MOVOstore, types.TypeMem)
-		v3 := b.NewValue0(v.Pos, OpAMD64MOVOload, types.TypeInt128)
-		v3.AddArg2(src, mem)
-		v2.AddArg3(dst, v3, mem)
-		v.AddArg3(v0, v1, v2)
-		return true
-	}
-	// match: (Move [s] dst src mem)
-	// cond: s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)
-	// result: (DUFFCOPY [s] dst src mem)
-	for {
-		s := auxIntToInt64(v.AuxInt)
-		dst := v_0
-		src := v_1
-		mem := v_2
-		if !(s > 64 && s <= 16*64 && s%16 == 0 && logLargeCopy(v, s)) {
-			break
-		}
-		v.reset(OpAMD64DUFFCOPY)
-		v.AuxInt = int64ToAuxInt(s)
-		v.AddArg3(dst, src, mem)
-		return true
-	}
-	// match: (Move [s] dst src mem)
-	// cond: s > 16*64 && s%8 == 0 && logLargeCopy(v, s)
+	// cond: s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)
 	// result: (REPMOVSQ dst src (MOVQconst [s/8]) mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		dst := v_0
 		src := v_1
 		mem := v_2
-		if !(s > 16*64 && s%8 == 0 && logLargeCopy(v, s)) {
+		if !(s > repMoveThreshold && s%8 == 0 && logLargeCopy(v, s)) {
 			break
 		}
 		v.reset(OpAMD64REPMOVSQ)
@ -57598,40 +57519,40 @@ func rewriteBlockAMD64(b *Block) bool {
 			break
 		}
 		// match: (EQ (TESTLconst [c] x))
-		// cond: isUint32PowerOfTwo(int64(c))
-		// result: (UGE (BTLconst [int8(log32(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint32(c))
+		// result: (UGE (BTLconst [int8(log32u(uint32(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTLconst {
 			v_0 := b.Controls[0]
 			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
-			if !(isUint32PowerOfTwo(int64(c))) {
+			if !(isUnsignedPowerOfTwo(uint32(c))) {
 				break
 			}
 			v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 			v0.AddArg(x)
 			b.resetWithControl(BlockAMD64UGE, v0)
 			return true
 		}
 		// match: (EQ (TESTQconst [c] x))
-		// cond: isUint64PowerOfTwo(int64(c))
-		// result: (UGE (BTQconst [int8(log32(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint64(c))
+		// result: (UGE (BTQconst [int8(log32u(uint32(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTQconst {
 			v_0 := b.Controls[0]
 			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
-			if !(isUint64PowerOfTwo(int64(c))) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				break
 			}
 			v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 			v0.AddArg(x)
 			b.resetWithControl(BlockAMD64UGE, v0)
 			return true
 		}
 		// match: (EQ (TESTQ (MOVQconst [c]) x))
-		// cond: isUint64PowerOfTwo(c)
-		// result: (UGE (BTQconst [int8(log64(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint64(c))
+		// result: (UGE (BTQconst [int8(log64u(uint64(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTQ {
 			v_0 := b.Controls[0]
 			_ = v_0.Args[1]
@ -57643,11 +57564,11 @@ func rewriteBlockAMD64(b *Block) bool {
 				}
 				c := auxIntToInt64(v_0_0.AuxInt)
 				x := v_0_1
-				if !(isUint64PowerOfTwo(c)) {
+				if !(isUnsignedPowerOfTwo(uint64(c))) {
 					continue
 				}
 				v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags)
-				v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+				v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 				v0.AddArg(x)
 				b.resetWithControl(BlockAMD64UGE, v0)
 				return true
@ -58578,40 +58499,40 @@ func rewriteBlockAMD64(b *Block) bool {
 			break
 		}
 		// match: (NE (TESTLconst [c] x))
-		// cond: isUint32PowerOfTwo(int64(c))
-		// result: (ULT (BTLconst [int8(log32(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint32(c))
+		// result: (ULT (BTLconst [int8(log32u(uint32(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTLconst {
 			v_0 := b.Controls[0]
 			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
-			if !(isUint32PowerOfTwo(int64(c))) {
+			if !(isUnsignedPowerOfTwo(uint32(c))) {
 				break
 			}
 			v0 := b.NewValue0(v_0.Pos, OpAMD64BTLconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 			v0.AddArg(x)
 			b.resetWithControl(BlockAMD64ULT, v0)
 			return true
 		}
 		// match: (NE (TESTQconst [c] x))
-		// cond: isUint64PowerOfTwo(int64(c))
-		// result: (ULT (BTQconst [int8(log32(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint64(c))
+		// result: (ULT (BTQconst [int8(log32u(uint32(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTQconst {
 			v_0 := b.Controls[0]
 			c := auxIntToInt32(v_0.AuxInt)
 			x := v_0.Args[0]
-			if !(isUint64PowerOfTwo(int64(c))) {
+			if !(isUnsignedPowerOfTwo(uint64(c))) {
 				break
 			}
 			v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags)
-			v0.AuxInt = int8ToAuxInt(int8(log32(c)))
+			v0.AuxInt = int8ToAuxInt(int8(log32u(uint32(c))))
 			v0.AddArg(x)
 			b.resetWithControl(BlockAMD64ULT, v0)
 			return true
 		}
 		// match: (NE (TESTQ (MOVQconst [c]) x))
-		// cond: isUint64PowerOfTwo(c)
-		// result: (ULT (BTQconst [int8(log64(c))] x))
+		// cond: isUnsignedPowerOfTwo(uint64(c))
+		// result: (ULT (BTQconst [int8(log64u(uint64(c)))] x))
 		for b.Controls[0].Op == OpAMD64TESTQ {
 			v_0 := b.Controls[0]
 			_ = v_0.Args[1]
@ -58623,11 +58544,11 @@ func rewriteBlockAMD64(b *Block) bool {
 				}
 				c := auxIntToInt64(v_0_0.AuxInt)
 				x := v_0_1
-				if !(isUint64PowerOfTwo(c)) {
+				if !(isUnsignedPowerOfTwo(uint64(c))) {
 					continue
 				}
 				v0 := b.NewValue0(v_0.Pos, OpAMD64BTQconst, types.TypeFlags)
-				v0.AuxInt = int8ToAuxInt(int8(log64(c)))
+				v0.AuxInt = int8ToAuxInt(int8(log64u(uint64(c))))
 				v0.AddArg(x)
 				b.resetWithControl(BlockAMD64ULT, v0)
 				return true
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@ -22321,141 +22321,34 @@ func rewriteValueARM64_OpZero(v *Value) bool {
 		v.AddArg4(ptr, v0, v0, mem)
 		return true
 	}
-	// match: (Zero [32] ptr mem)
-	// result: (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))
-	for {
-		if auxIntToInt64(v.AuxInt) != 32 {
-			break
-		}
-		ptr := v_0
-		mem := v_1
-		v.reset(OpARM64STP)
-		v.AuxInt = int32ToAuxInt(16)
-		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
-		v0.AuxInt = int64ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v1.AuxInt = int32ToAuxInt(0)
-		v1.AddArg4(ptr, v0, v0, mem)
-		v.AddArg4(ptr, v0, v0, v1)
-		return true
-	}
-	// match: (Zero [48] ptr mem)
-	// result: (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem)))
-	for {
-		if auxIntToInt64(v.AuxInt) != 48 {
-			break
-		}
-		ptr := v_0
-		mem := v_1
-		v.reset(OpARM64STP)
-		v.AuxInt = int32ToAuxInt(32)
-		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
-		v0.AuxInt = int64ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v1.AuxInt = int32ToAuxInt(16)
-		v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v2.AuxInt = int32ToAuxInt(0)
-		v2.AddArg4(ptr, v0, v0, mem)
-		v1.AddArg4(ptr, v0, v0, v2)
-		v.AddArg4(ptr, v0, v0, v1)
-		return true
-	}
-	// match: (Zero [64] ptr mem)
-	// result: (STP [48] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [32] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [16] ptr (MOVDconst [0]) (MOVDconst [0]) (STP [0] ptr (MOVDconst [0]) (MOVDconst [0]) mem))))
-	for {
-		if auxIntToInt64(v.AuxInt) != 64 {
-			break
-		}
-		ptr := v_0
-		mem := v_1
-		v.reset(OpARM64STP)
-		v.AuxInt = int32ToAuxInt(48)
-		v0 := b.NewValue0(v.Pos, OpARM64MOVDconst, typ.UInt64)
-		v0.AuxInt = int64ToAuxInt(0)
-		v1 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v1.AuxInt = int32ToAuxInt(32)
-		v2 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v2.AuxInt = int32ToAuxInt(16)
-		v3 := b.NewValue0(v.Pos, OpARM64STP, types.TypeMem)
-		v3.AuxInt = int32ToAuxInt(0)
-		v3.AddArg4(ptr, v0, v0, mem)
-		v2.AddArg4(ptr, v0, v0, v3)
-		v1.AddArg4(ptr, v0, v0, v2)
-		v.AddArg4(ptr, v0, v0, v1)
-		return true
-	}
 	// match: (Zero [s] ptr mem)
-	// cond: s%16 != 0 && s%16 <= 8 && s > 16
-	// result: (Zero [8] (OffPtr <ptr.Type> ptr [s-8]) (Zero [s-s%16] ptr mem))
+	// cond: s > 16 && s < 192
+	// result: (LoweredZero [s] ptr mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		ptr := v_0
 		mem := v_1
-		if !(s%16 != 0 && s%16 <= 8 && s > 16) {
+		if !(s > 16 && s < 192) {
 			break
 		}
-		v.reset(OpZero)
-		v.AuxInt = int64ToAuxInt(8)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
-		v0.AuxInt = int64ToAuxInt(s - 8)
-		v0.AddArg(ptr)
-		v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
-		v1.AuxInt = int64ToAuxInt(s - s%16)
-		v1.AddArg2(ptr, mem)
-		v.AddArg2(v0, v1)
-		return true
-	}
-	// match: (Zero [s] ptr mem)
-	// cond: s%16 != 0 && s%16 > 8 && s > 16
-	// result: (Zero [16] (OffPtr <ptr.Type> ptr [s-16]) (Zero [s-s%16] ptr mem))
-	for {
-		s := auxIntToInt64(v.AuxInt)
-		ptr := v_0
-		mem := v_1
-		if !(s%16 != 0 && s%16 > 8 && s > 16) {
-			break
-		}
-		v.reset(OpZero)
-		v.AuxInt = int64ToAuxInt(16)
-		v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type)
-		v0.AuxInt = int64ToAuxInt(s - 16)
-		v0.AddArg(ptr)
-		v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem)
-		v1.AuxInt = int64ToAuxInt(s - s%16)
-		v1.AddArg2(ptr, mem)
-		v.AddArg2(v0, v1)
-		return true
-	}
-	// match: (Zero [s] ptr mem)
-	// cond: s%16 == 0 && s > 64 && s <= 16*64
-	// result: (DUFFZERO [4 * (64 - s/16)] ptr mem)
-	for {
-		s := auxIntToInt64(v.AuxInt)
-		ptr := v_0
-		mem := v_1
-		if !(s%16 == 0 && s > 64 && s <= 16*64) {
-			break
-		}
-		v.reset(OpARM64DUFFZERO)
-		v.AuxInt = int64ToAuxInt(4 * (64 - s/16))
+		v.reset(OpARM64LoweredZero)
+		v.AuxInt = int64ToAuxInt(s)
 		v.AddArg2(ptr, mem)
 		return true
 	}
 	// match: (Zero [s] ptr mem)
-	// cond: s%16 == 0 && s > 16*64
-	// result: (LoweredZero ptr (ADDconst <ptr.Type> [s-16] ptr) mem)
+	// cond: s >= 192
+	// result: (LoweredZeroLoop [s] ptr mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		ptr := v_0
 		mem := v_1
-		if !(s%16 == 0 && s > 16*64) {
+		if !(s >= 192) {
 			break
 		}
-		v.reset(OpARM64LoweredZero)
-		v0 := b.NewValue0(v.Pos, OpARM64ADDconst, ptr.Type)
-		v0.AuxInt = int64ToAuxInt(s - 16)
-		v0.AddArg(ptr)
-		v.AddArg3(ptr, v0, mem)
+		v.reset(OpARM64LoweredZeroLoop)
+		v.AuxInt = int64ToAuxInt(s)
+		v.AddArg2(ptr, mem)
 		return true
 	}
 	return false
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@ -5539,6 +5539,7 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	config := b.Func.Config
+	typ := &b.Func.Config.Types
 	// match: (MULV _ (MOVVconst [0]))
 	// result: (MOVVconst [0])
 	for {
@ -5583,6 +5584,44 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 		}
 		break
 	}
+	// match: (MULV (NEGV x) (MOVVconst [c]))
+	// result: (MULV x (MOVVconst [-c]))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpLOONG64NEGV {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpLOONG64MOVVconst {
+				continue
+			}
+			c := auxIntToInt64(v_1.AuxInt)
+			v.reset(OpLOONG64MULV)
+			v0 := b.NewValue0(v.Pos, OpLOONG64MOVVconst, typ.UInt64)
+			v0.AuxInt = int64ToAuxInt(-c)
+			v.AddArg2(x, v0)
+			return true
+		}
+		break
+	}
+	// match: (MULV (NEGV x) (NEGV y))
+	// result: (MULV x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpLOONG64NEGV {
+				continue
+			}
+			x := v_0.Args[0]
+			if v_1.Op != OpLOONG64NEGV {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpLOONG64MULV)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
 	// match: (MULV (MOVVconst [c]) (MOVVconst [d]))
 	// result: (MOVVconst [c*d])
 	for {
@ -11440,8 +11479,124 @@ func rewriteValueLOONG64_OpZero(v *Value) bool {
 func rewriteBlockLOONG64(b *Block) bool {
 	typ := &b.Func.Config.Types
 	switch b.Kind {
-	case BlockLOONG64EQ:
-		// match: (EQ (FPFlagTrue cmp) yes no)
+	case BlockLOONG64BEQ:
+		// match: (BEQ (MOVVconst [0]) cond yes no)
+		// result: (EQZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64EQZ, cond)
+			return true
+		}
+		// match: (BEQ cond (MOVVconst [0]) yes no)
+		// result: (EQZ cond yes no)
+		for b.Controls[1].Op == OpLOONG64MOVVconst {
+			cond := b.Controls[0]
+			v_1 := b.Controls[1]
+			if auxIntToInt64(v_1.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockLOONG64EQZ, cond)
+			return true
+		}
+	case BlockLOONG64BGE:
+		// match: (BGE (MOVVconst [0]) cond yes no)
+		// result: (LEZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64LEZ, cond)
+			return true
+		}
+		// match: (BGE cond (MOVVconst [0]) yes no)
+		// result: (GEZ cond yes no)
+		for b.Controls[1].Op == OpLOONG64MOVVconst {
+			cond := b.Controls[0]
+			v_1 := b.Controls[1]
+			if auxIntToInt64(v_1.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockLOONG64GEZ, cond)
+			return true
+		}
+	case BlockLOONG64BGEU:
+		// match: (BGEU (MOVVconst [0]) cond yes no)
+		// result: (EQZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64EQZ, cond)
+			return true
+		}
+	case BlockLOONG64BLT:
+		// match: (BLT (MOVVconst [0]) cond yes no)
+		// result: (GTZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64GTZ, cond)
+			return true
+		}
+		// match: (BLT cond (MOVVconst [0]) yes no)
+		// result: (LTZ cond yes no)
+		for b.Controls[1].Op == OpLOONG64MOVVconst {
+			cond := b.Controls[0]
+			v_1 := b.Controls[1]
+			if auxIntToInt64(v_1.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockLOONG64LTZ, cond)
+			return true
+		}
+	case BlockLOONG64BLTU:
+		// match: (BLTU (MOVVconst [0]) cond yes no)
+		// result: (NEZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64NEZ, cond)
+			return true
+		}
+	case BlockLOONG64BNE:
+		// match: (BNE (MOVVconst [0]) cond yes no)
+		// result: (NEZ cond yes no)
+		for b.Controls[0].Op == OpLOONG64MOVVconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			cond := b.Controls[1]
+			b.resetWithControl(BlockLOONG64NEZ, cond)
+			return true
+		}
+		// match: (BNE cond (MOVVconst [0]) yes no)
+		// result: (NEZ cond yes no)
+		for b.Controls[1].Op == OpLOONG64MOVVconst {
+			cond := b.Controls[0]
+			v_1 := b.Controls[1]
+			if auxIntToInt64(v_1.AuxInt) != 0 {
+				break
+			}
+			b.resetWithControl(BlockLOONG64NEZ, cond)
+			return true
+		}
+	case BlockLOONG64EQZ:
+		// match: (EQZ (FPFlagTrue cmp) yes no)
 		// result: (FPF cmp yes no)
 		for b.Controls[0].Op == OpLOONG64FPFlagTrue {
 			v_0 := b.Controls[0]
@ -11449,7 +11604,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64FPF, cmp)
 			return true
 		}
-		// match: (EQ (FPFlagFalse cmp) yes no)
+		// match: (EQZ (FPFlagFalse cmp) yes no)
 		// result: (FPT cmp yes no)
 		for b.Controls[0].Op == OpLOONG64FPFlagFalse {
 			v_0 := b.Controls[0]
@ -11457,8 +11612,8 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64FPT, cmp)
 			return true
 		}
-		// match: (EQ (XORconst [1] cmp:(SGT _ _)) yes no)
-		// result: (NE cmp yes no)
+		// match: (EQZ (XORconst [1] cmp:(SGT _ _)) yes no)
+		// result: (NEZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11468,11 +11623,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGT {
 				break
 			}
-			b.resetWithControl(BlockLOONG64NE, cmp)
+			b.resetWithControl(BlockLOONG64NEZ, cmp)
 			return true
 		}
-		// match: (EQ (XORconst [1] cmp:(SGTU _ _)) yes no)
-		// result: (NE cmp yes no)
+		// match: (EQZ (XORconst [1] cmp:(SGTU _ _)) yes no)
+		// result: (NEZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11482,11 +11637,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTU {
 				break
 			}
-			b.resetWithControl(BlockLOONG64NE, cmp)
+			b.resetWithControl(BlockLOONG64NEZ, cmp)
 			return true
 		}
-		// match: (EQ (XORconst [1] cmp:(SGTconst _)) yes no)
-		// result: (NE cmp yes no)
+		// match: (EQZ (XORconst [1] cmp:(SGTconst _)) yes no)
+		// result: (NEZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11496,11 +11651,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTconst {
 				break
 			}
-			b.resetWithControl(BlockLOONG64NE, cmp)
+			b.resetWithControl(BlockLOONG64NEZ, cmp)
 			return true
 		}
-		// match: (EQ (XORconst [1] cmp:(SGTUconst _)) yes no)
-		// result: (NE cmp yes no)
+		// match: (EQZ (XORconst [1] cmp:(SGTUconst _)) yes no)
+		// result: (NEZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11510,22 +11665,22 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTUconst {
 				break
 			}
-			b.resetWithControl(BlockLOONG64NE, cmp)
+			b.resetWithControl(BlockLOONG64NEZ, cmp)
 			return true
 		}
-		// match: (EQ (SGTUconst [1] x) yes no)
-		// result: (NE x yes no)
+		// match: (EQZ (SGTUconst [1] x) yes no)
+		// result: (NEZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTUconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
 				break
 			}
 			x := v_0.Args[0]
-			b.resetWithControl(BlockLOONG64NE, x)
+			b.resetWithControl(BlockLOONG64NEZ, x)
 			return true
 		}
-		// match: (EQ (SGTU x (MOVVconst [0])) yes no)
-		// result: (EQ x yes no)
+		// match: (EQZ (SGTU x (MOVVconst [0])) yes no)
+		// result: (EQZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
 			_ = v_0.Args[1]
@ -11534,10 +11689,10 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 0 {
 				break
 			}
-			b.resetWithControl(BlockLOONG64EQ, x)
+			b.resetWithControl(BlockLOONG64EQZ, x)
 			return true
 		}
-		// match: (EQ (SGTconst [0] x) yes no)
+		// match: (EQZ (SGTconst [0] x) yes no)
 		// result: (GEZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTconst {
 			v_0 := b.Controls[0]
@ -11548,7 +11703,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64GEZ, x)
 			return true
 		}
-		// match: (EQ (SGT x (MOVVconst [0])) yes no)
+		// match: (EQZ (SGT x (MOVVconst [0])) yes no)
 		// result: (LEZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGT {
 			v_0 := b.Controls[0]
@ -11561,9 +11716,9 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64LEZ, x)
 			return true
 		}
-		// match: (EQ (SGTU (MOVVconst [c]) y) yes no)
+		// match: (EQZ (SGTU (MOVVconst [c]) y) yes no)
 		// cond: c >= -2048 && c <= 2047
-		// result: (EQ (SGTUconst [c] y) yes no)
+		// result: (EQZ (SGTUconst [c] y) yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
 			y := v_0.Args[1]
@ -11578,10 +11733,10 @@ func rewriteBlockLOONG64(b *Block) bool {
 			v0 := b.NewValue0(v_0.Pos, OpLOONG64SGTUconst, typ.Bool)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockLOONG64EQ, v0)
+			b.resetWithControl(BlockLOONG64EQZ, v0)
 			return true
 		}
-		// match: (EQ (SUBV x y) yes no)
+		// match: (EQZ (SUBV x y) yes no)
 		// result: (BEQ x y yes no)
 		for b.Controls[0].Op == OpLOONG64SUBV {
 			v_0 := b.Controls[0]
@ -11590,7 +11745,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BEQ, x, y)
 			return true
 		}
-		// match: (EQ (SGT x y) yes no)
+		// match: (EQZ (SGT x y) yes no)
 		// result: (BGE y x yes no)
 		for b.Controls[0].Op == OpLOONG64SGT {
 			v_0 := b.Controls[0]
@ -11599,7 +11754,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BGE, y, x)
 			return true
 		}
-		// match: (EQ (SGTU x y) yes no)
+		// match: (EQZ (SGTU x y) yes no)
 		// result: (BGEU y x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
@ -11608,7 +11763,29 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BGEU, y, x)
 			return true
 		}
-		// match: (EQ (MOVVconst [0]) yes no)
+		// match: (EQZ (SGTconst [c] y) yes no)
+		// result: (BGE y (MOVVconst [c]) yes no)
+		for b.Controls[0].Op == OpLOONG64SGTconst {
+			v_0 := b.Controls[0]
+			c := auxIntToInt64(v_0.AuxInt)
+			y := v_0.Args[0]
+			v0 := b.NewValue0(b.Pos, OpLOONG64MOVVconst, typ.UInt64)
+			v0.AuxInt = int64ToAuxInt(c)
+			b.resetWithControl2(BlockLOONG64BGE, y, v0)
+			return true
+		}
+		// match: (EQZ (SGTUconst [c] y) yes no)
+		// result: (BGEU y (MOVVconst [c]) yes no)
+		for b.Controls[0].Op == OpLOONG64SGTUconst {
+			v_0 := b.Controls[0]
+			c := auxIntToInt64(v_0.AuxInt)
+			y := v_0.Args[0]
+			v0 := b.NewValue0(b.Pos, OpLOONG64MOVVconst, typ.UInt64)
+			v0.AuxInt = int64ToAuxInt(c)
+			b.resetWithControl2(BlockLOONG64BGEU, y, v0)
+			return true
+		}
+		// match: (EQZ (MOVVconst [0]) yes no)
 		// result: (First yes no)
 		for b.Controls[0].Op == OpLOONG64MOVVconst {
 			v_0 := b.Controls[0]
@ -11618,7 +11795,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.Reset(BlockFirst)
 			return true
 		}
-		// match: (EQ (MOVVconst [c]) yes no)
+		// match: (EQZ (MOVVconst [c]) yes no)
 		// cond: c != 0
 		// result: (First no yes)
 		for b.Controls[0].Op == OpLOONG64MOVVconst {
@ -11631,6 +11808,14 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.swapSuccessors()
 			return true
 		}
+		// match: (EQZ (NEGV x) yes no)
+		// result: (EQZ x yes no)
+		for b.Controls[0].Op == OpLOONG64NEGV {
+			v_0 := b.Controls[0]
+			x := v_0.Args[0]
+			b.resetWithControl(BlockLOONG64EQZ, x)
+			return true
+		}
 	case BlockLOONG64GEZ:
 		// match: (GEZ (MOVVconst [c]) yes no)
 		// cond: c >= 0
@ -11685,12 +11870,12 @@ func rewriteBlockLOONG64(b *Block) bool {
 		}
 	case BlockIf:
 		// match: (If cond yes no)
-		// result: (NE (MOVBUreg <typ.UInt64> cond) yes no)
+		// result: (NEZ (MOVBUreg <typ.UInt64> cond) yes no)
 		for {
 			cond := b.Controls[0]
 			v0 := b.NewValue0(cond.Pos, OpLOONG64MOVBUreg, typ.UInt64)
 			v0.AddArg(cond)
-			b.resetWithControl(BlockLOONG64NE, v0)
+			b.resetWithControl(BlockLOONG64NEZ, v0)
 			return true
 		}
 	case BlockLOONG64LEZ:
@ -11745,8 +11930,8 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.swapSuccessors()
 			return true
 		}
-	case BlockLOONG64NE:
-		// match: (NE (FPFlagTrue cmp) yes no)
+	case BlockLOONG64NEZ:
+		// match: (NEZ (FPFlagTrue cmp) yes no)
 		// result: (FPT cmp yes no)
 		for b.Controls[0].Op == OpLOONG64FPFlagTrue {
 			v_0 := b.Controls[0]
@ -11754,7 +11939,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64FPT, cmp)
 			return true
 		}
-		// match: (NE (FPFlagFalse cmp) yes no)
+		// match: (NEZ (FPFlagFalse cmp) yes no)
 		// result: (FPF cmp yes no)
 		for b.Controls[0].Op == OpLOONG64FPFlagFalse {
 			v_0 := b.Controls[0]
@ -11762,8 +11947,8 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64FPF, cmp)
 			return true
 		}
-		// match: (NE (XORconst [1] cmp:(SGT _ _)) yes no)
-		// result: (EQ cmp yes no)
+		// match: (NEZ (XORconst [1] cmp:(SGT _ _)) yes no)
+		// result: (EQZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11773,11 +11958,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGT {
 				break
 			}
-			b.resetWithControl(BlockLOONG64EQ, cmp)
+			b.resetWithControl(BlockLOONG64EQZ, cmp)
 			return true
 		}
-		// match: (NE (XORconst [1] cmp:(SGTU _ _)) yes no)
-		// result: (EQ cmp yes no)
+		// match: (NEZ (XORconst [1] cmp:(SGTU _ _)) yes no)
+		// result: (EQZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11787,11 +11972,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTU {
 				break
 			}
-			b.resetWithControl(BlockLOONG64EQ, cmp)
+			b.resetWithControl(BlockLOONG64EQZ, cmp)
 			return true
 		}
-		// match: (NE (XORconst [1] cmp:(SGTconst _)) yes no)
-		// result: (EQ cmp yes no)
+		// match: (NEZ (XORconst [1] cmp:(SGTconst _)) yes no)
+		// result: (EQZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11801,11 +11986,11 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTconst {
 				break
 			}
-			b.resetWithControl(BlockLOONG64EQ, cmp)
+			b.resetWithControl(BlockLOONG64EQZ, cmp)
 			return true
 		}
-		// match: (NE (XORconst [1] cmp:(SGTUconst _)) yes no)
-		// result: (EQ cmp yes no)
+		// match: (NEZ (XORconst [1] cmp:(SGTUconst _)) yes no)
+		// result: (EQZ cmp yes no)
 		for b.Controls[0].Op == OpLOONG64XORconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
@ -11815,22 +12000,22 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if cmp.Op != OpLOONG64SGTUconst {
 				break
 			}
-			b.resetWithControl(BlockLOONG64EQ, cmp)
+			b.resetWithControl(BlockLOONG64EQZ, cmp)
 			return true
 		}
-		// match: (NE (SGTUconst [1] x) yes no)
-		// result: (EQ x yes no)
+		// match: (NEZ (SGTUconst [1] x) yes no)
+		// result: (EQZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTUconst {
 			v_0 := b.Controls[0]
 			if auxIntToInt64(v_0.AuxInt) != 1 {
 				break
 			}
 			x := v_0.Args[0]
-			b.resetWithControl(BlockLOONG64EQ, x)
+			b.resetWithControl(BlockLOONG64EQZ, x)
 			return true
 		}
-		// match: (NE (SGTU x (MOVVconst [0])) yes no)
-		// result: (NE x yes no)
+		// match: (NEZ (SGTU x (MOVVconst [0])) yes no)
+		// result: (NEZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
 			_ = v_0.Args[1]
@ -11839,10 +12024,10 @@ func rewriteBlockLOONG64(b *Block) bool {
 			if v_0_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_0_1.AuxInt) != 0 {
 				break
 			}
-			b.resetWithControl(BlockLOONG64NE, x)
+			b.resetWithControl(BlockLOONG64NEZ, x)
 			return true
 		}
-		// match: (NE (SGTconst [0] x) yes no)
+		// match: (NEZ (SGTconst [0] x) yes no)
 		// result: (LTZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTconst {
 			v_0 := b.Controls[0]
@ -11853,7 +12038,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64LTZ, x)
 			return true
 		}
-		// match: (NE (SGT x (MOVVconst [0])) yes no)
+		// match: (NEZ (SGT x (MOVVconst [0])) yes no)
 		// result: (GTZ x yes no)
 		for b.Controls[0].Op == OpLOONG64SGT {
 			v_0 := b.Controls[0]
@ -11866,9 +12051,9 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl(BlockLOONG64GTZ, x)
 			return true
 		}
-		// match: (NE (SGTU (MOVVconst [c]) y) yes no)
+		// match: (NEZ (SGTU (MOVVconst [c]) y) yes no)
 		// cond: c >= -2048 && c <= 2047
-		// result: (NE (SGTUconst [c] y) yes no)
+		// result: (NEZ (SGTUconst [c] y) yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
 			y := v_0.Args[1]
@ -11883,10 +12068,10 @@ func rewriteBlockLOONG64(b *Block) bool {
 			v0 := b.NewValue0(v_0.Pos, OpLOONG64SGTUconst, typ.Bool)
 			v0.AuxInt = int64ToAuxInt(c)
 			v0.AddArg(y)
-			b.resetWithControl(BlockLOONG64NE, v0)
+			b.resetWithControl(BlockLOONG64NEZ, v0)
 			return true
 		}
-		// match: (NE (SUBV x y) yes no)
+		// match: (NEZ (SUBV x y) yes no)
 		// result: (BNE x y yes no)
 		for b.Controls[0].Op == OpLOONG64SUBV {
 			v_0 := b.Controls[0]
@ -11895,7 +12080,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BNE, x, y)
 			return true
 		}
-		// match: (NE (SGT x y) yes no)
+		// match: (NEZ (SGT x y) yes no)
 		// result: (BLT y x yes no)
 		for b.Controls[0].Op == OpLOONG64SGT {
 			v_0 := b.Controls[0]
@ -11904,7 +12089,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BLT, y, x)
 			return true
 		}
-		// match: (NE (SGTU x y) yes no)
+		// match: (NEZ (SGTU x y) yes no)
 		// result: (BLTU y x yes no)
 		for b.Controls[0].Op == OpLOONG64SGTU {
 			v_0 := b.Controls[0]
@ -11913,7 +12098,29 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.resetWithControl2(BlockLOONG64BLTU, y, x)
 			return true
 		}
-		// match: (NE (MOVVconst [0]) yes no)
+		// match: (NEZ (SGTconst [c] y) yes no)
+		// result: (BLT y (MOVVconst [c]) yes no)
+		for b.Controls[0].Op == OpLOONG64SGTconst {
+			v_0 := b.Controls[0]
+			c := auxIntToInt64(v_0.AuxInt)
+			y := v_0.Args[0]
+			v0 := b.NewValue0(b.Pos, OpLOONG64MOVVconst, typ.UInt64)
+			v0.AuxInt = int64ToAuxInt(c)
+			b.resetWithControl2(BlockLOONG64BLT, y, v0)
+			return true
+		}
+		// match: (NEZ (SGTUconst [c] y) yes no)
+		// result: (BLTU y (MOVVconst [c]) yes no)
+		for b.Controls[0].Op == OpLOONG64SGTUconst {
+			v_0 := b.Controls[0]
+			c := auxIntToInt64(v_0.AuxInt)
+			y := v_0.Args[0]
+			v0 := b.NewValue0(b.Pos, OpLOONG64MOVVconst, typ.UInt64)
+			v0.AuxInt = int64ToAuxInt(c)
+			b.resetWithControl2(BlockLOONG64BLTU, y, v0)
+			return true
+		}
+		// match: (NEZ (MOVVconst [0]) yes no)
 		// result: (First no yes)
 		for b.Controls[0].Op == OpLOONG64MOVVconst {
 			v_0 := b.Controls[0]
@ -11924,7 +12131,7 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.swapSuccessors()
 			return true
 		}
-		// match: (NE (MOVVconst [c]) yes no)
+		// match: (NEZ (MOVVconst [c]) yes no)
 		// cond: c != 0
 		// result: (First yes no)
 		for b.Controls[0].Op == OpLOONG64MOVVconst {
@ -11936,6 +12143,14 @@ func rewriteBlockLOONG64(b *Block) bool {
 			b.Reset(BlockFirst)
 			return true
 		}
+		// match: (NEZ (NEGV x) yes no)
+		// result: (NEZ x yes no)
+		for b.Controls[0].Op == OpLOONG64NEGV {
+			v_0 := b.Controls[0]
+			x := v_0.Args[0]
+			b.resetWithControl(BlockLOONG64NEZ, x)
+			return true
+		}
 	}
 	return false
 }
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go
@ -25,5 +25,37 @@ func rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v *Value) bool {
 	return false
 }
 func rewriteBlockLOONG64latelower(b *Block) bool {
+	switch b.Kind {
+	case BlockLOONG64EQZ:
+		// match: (EQZ (XOR x y) yes no)
+		// result: (BEQ x y yes no)
+		for b.Controls[0].Op == OpLOONG64XOR {
+			v_0 := b.Controls[0]
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+				x := v_0_0
+				y := v_0_1
+				b.resetWithControl2(BlockLOONG64BEQ, x, y)
+				return true
+			}
+		}
+	case BlockLOONG64NEZ:
+		// match: (NEZ (XOR x y) yes no)
+		// result: (BNE x y yes no)
+		for b.Controls[0].Op == OpLOONG64XOR {
+			v_0 := b.Controls[0]
+			_ = v_0.Args[1]
+			v_0_0 := v_0.Args[0]
+			v_0_1 := v_0.Args[1]
+			for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
+				x := v_0_0
+				y := v_0_1
+				b.resetWithControl2(BlockLOONG64BNE, x, y)
+				return true
+			}
+		}
+	}
 	return false
 }
--- a/src/cmd/compile/internal/ssa/rewritePPC64.go
+++ b/src/cmd/compile/internal/ssa/rewritePPC64.go
@ -540,6 +540,12 @@ func rewriteValuePPC64(v *Value) bool {
 		return rewriteValuePPC64_OpPPC64LessEqual(v)
 	case OpPPC64LessThan:
 		return rewriteValuePPC64_OpPPC64LessThan(v)
+	case OpPPC64LoweredPanicBoundsCR:
+		return rewriteValuePPC64_OpPPC64LoweredPanicBoundsCR(v)
+	case OpPPC64LoweredPanicBoundsRC:
+		return rewriteValuePPC64_OpPPC64LoweredPanicBoundsRC(v)
+	case OpPPC64LoweredPanicBoundsRR:
+		return rewriteValuePPC64_OpPPC64LoweredPanicBoundsRR(v)
 	case OpPPC64MFVSRD:
 		return rewriteValuePPC64_OpPPC64MFVSRD(v)
 	case OpPPC64MOVBZload:
@ -667,7 +673,8 @@ func rewriteValuePPC64(v *Value) bool {
 	case OpPPC64XORconst:
 		return rewriteValuePPC64_OpPPC64XORconst(v)
 	case OpPanicBounds:
-		return rewriteValuePPC64_OpPanicBounds(v)
+		v.Op = OpPPC64LoweredPanicBoundsRR
+		return true
 	case OpPopCount16:
 		return rewriteValuePPC64_OpPopCount16(v)
 	case OpPopCount32:
@ -6826,6 +6833,86 @@ func rewriteValuePPC64_OpPPC64LessThan(v *Value) bool {
 		return true
 	}
 }
+func rewriteValuePPC64_OpPPC64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpPPC64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpPPC64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValuePPC64_OpPPC64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpPPC64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpPPC64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpPPC64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValuePPC64_OpPPC64MFVSRD(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@ -12981,60 +13068,6 @@ func rewriteValuePPC64_OpPPC64XORconst(v *Value) bool {
 	}
 	return false
 }
-func rewriteValuePPC64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpPPC64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpPPC64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpPPC64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValuePPC64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@ -513,14 +513,30 @@ func rewriteValueRISCV64(v *Value) bool {
 		return rewriteValueRISCV64_OpRISCV64FADDD(v)
 	case OpRISCV64FADDS:
 		return rewriteValueRISCV64_OpRISCV64FADDS(v)
+	case OpRISCV64FEQD:
+		return rewriteValueRISCV64_OpRISCV64FEQD(v)
+	case OpRISCV64FLED:
+		return rewriteValueRISCV64_OpRISCV64FLED(v)
+	case OpRISCV64FLTD:
+		return rewriteValueRISCV64_OpRISCV64FLTD(v)
 	case OpRISCV64FMADDD:
 		return rewriteValueRISCV64_OpRISCV64FMADDD(v)
 	case OpRISCV64FMADDS:
 		return rewriteValueRISCV64_OpRISCV64FMADDS(v)
+	case OpRISCV64FMOVDload:
+		return rewriteValueRISCV64_OpRISCV64FMOVDload(v)
+	case OpRISCV64FMOVDstore:
+		return rewriteValueRISCV64_OpRISCV64FMOVDstore(v)
+	case OpRISCV64FMOVWload:
+		return rewriteValueRISCV64_OpRISCV64FMOVWload(v)
+	case OpRISCV64FMOVWstore:
+		return rewriteValueRISCV64_OpRISCV64FMOVWstore(v)
 	case OpRISCV64FMSUBD:
 		return rewriteValueRISCV64_OpRISCV64FMSUBD(v)
 	case OpRISCV64FMSUBS:
 		return rewriteValueRISCV64_OpRISCV64FMSUBS(v)
+	case OpRISCV64FNED:
+		return rewriteValueRISCV64_OpRISCV64FNED(v)
 	case OpRISCV64FNMADDD:
 		return rewriteValueRISCV64_OpRISCV64FNMADDD(v)
 	case OpRISCV64FNMADDS:
@ -3754,6 +3770,149 @@ func rewriteValueRISCV64_OpRISCV64FADDS(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueRISCV64_OpRISCV64FEQD(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))
+	// result: (ANDI [1] (FCLASSD x))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpRISCV64FMVDX {
+				continue
+			}
+			v_1_0 := v_1.Args[0]
+			if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(-1))) {
+				continue
+			}
+			v.reset(OpRISCV64ANDI)
+			v.AuxInt = int64ToAuxInt(1)
+			v0 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+			v0.AddArg(x)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (FEQD x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))])))
+	// result: (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpRISCV64FMVDX {
+				continue
+			}
+			v_1_0 := v_1.Args[0]
+			if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(1))) {
+				continue
+			}
+			v.reset(OpRISCV64SNEZ)
+			v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+			v0.AuxInt = int64ToAuxInt(1 << 7)
+			v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+			v1.AddArg(x)
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64FLED(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (FLED (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])) x)
+	// result: (SNEZ (ANDI <typ.Int64> [0xff &^ 1] (FCLASSD x)))
+	for {
+		if v_0.Op != OpRISCV64FMVDX {
+			break
+		}
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0_0.AuxInt) != int64(math.Float64bits(-math.MaxFloat64)) {
+			break
+		}
+		x := v_1
+		v.reset(OpRISCV64SNEZ)
+		v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+		v0.AuxInt = int64ToAuxInt(0xff &^ 1)
+		v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (FLED x (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])))
+	// result: (SNEZ (ANDI <typ.Int64> [0xff &^ (1<<7)] (FCLASSD x)))
+	for {
+		x := v_0
+		if v_1.Op != OpRISCV64FMVDX {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.MaxFloat64)) {
+			break
+		}
+		v.reset(OpRISCV64SNEZ)
+		v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+		v0.AuxInt = int64ToAuxInt(0xff &^ (1 << 7))
+		v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64FLTD(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (FLTD x (FMVDX (MOVDconst [int64(math.Float64bits(-math.MaxFloat64))])))
+	// result: (ANDI [1] (FCLASSD x))
+	for {
+		x := v_0
+		if v_1.Op != OpRISCV64FMVDX {
+			break
+		}
+		v_1_0 := v_1.Args[0]
+		if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(-math.MaxFloat64)) {
+			break
+		}
+		v.reset(OpRISCV64ANDI)
+		v.AuxInt = int64ToAuxInt(1)
+		v0 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
+	// match: (FLTD (FMVDX (MOVDconst [int64(math.Float64bits(math.MaxFloat64))])) x)
+	// result: (SNEZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+	for {
+		if v_0.Op != OpRISCV64FMVDX {
+			break
+		}
+		v_0_0 := v_0.Args[0]
+		if v_0_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_0_0.AuxInt) != int64(math.Float64bits(math.MaxFloat64)) {
+			break
+		}
+		x := v_1
+		v.reset(OpRISCV64SNEZ)
+		v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+		v0.AuxInt = int64ToAuxInt(1 << 7)
+		v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+		v1.AddArg(x)
+		v0.AddArg(v1)
+		v.AddArg(v0)
+		return true
+	}
+	return false
+}
 func rewriteValueRISCV64_OpRISCV64FMADDD(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@ -3844,6 +4003,250 @@ func rewriteValueRISCV64_OpRISCV64FMADDS(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueRISCV64_OpRISCV64FMOVDload(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	config := b.Func.Config
+	// match: (FMOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
+	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+	// result: (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym1 := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64MOVaddr {
+			break
+		}
+		off2 := auxIntToInt32(v_0.AuxInt)
+		sym2 := auxToSym(v_0.Aux)
+		base := v_0.Args[0]
+		mem := v_1
+		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVDload)
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(mergeSym(sym1, sym2))
+		v.AddArg2(base, mem)
+		return true
+	}
+	// match: (FMOVDload [off1] {sym} (ADDI [off2] base) mem)
+	// cond: is32Bit(int64(off1)+off2)
+	// result: (FMOVDload [off1+int32(off2)] {sym} base mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64ADDI {
+			break
+		}
+		off2 := auxIntToInt64(v_0.AuxInt)
+		base := v_0.Args[0]
+		mem := v_1
+		if !(is32Bit(int64(off1) + off2)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVDload)
+		v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+		v.Aux = symToAux(sym)
+		v.AddArg2(base, mem)
+		return true
+	}
+	// match: (FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _))
+	// cond: isSamePtr(ptr1, ptr2)
+	// result: (FMVDX x)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr1 := v_0
+		if v_1.Op != OpRISCV64MOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+			break
+		}
+		x := v_1.Args[1]
+		ptr2 := v_1.Args[0]
+		if !(isSamePtr(ptr1, ptr2)) {
+			break
+		}
+		v.reset(OpRISCV64FMVDX)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVDstore(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	config := b.Func.Config
+	// match: (FMOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
+	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+	// result: (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym1 := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64MOVaddr {
+			break
+		}
+		off2 := auxIntToInt32(v_0.AuxInt)
+		sym2 := auxToSym(v_0.Aux)
+		base := v_0.Args[0]
+		val := v_1
+		mem := v_2
+		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVDstore)
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(mergeSym(sym1, sym2))
+		v.AddArg3(base, val, mem)
+		return true
+	}
+	// match: (FMOVDstore [off1] {sym} (ADDI [off2] base) val mem)
+	// cond: is32Bit(int64(off1)+off2)
+	// result: (FMOVDstore [off1+int32(off2)] {sym} base val mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64ADDI {
+			break
+		}
+		off2 := auxIntToInt64(v_0.AuxInt)
+		base := v_0.Args[0]
+		val := v_1
+		mem := v_2
+		if !(is32Bit(int64(off1) + off2)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVDstore)
+		v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+		v.Aux = symToAux(sym)
+		v.AddArg3(base, val, mem)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVWload(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	config := b.Func.Config
+	// match: (FMOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
+	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+	// result: (FMOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym1 := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64MOVaddr {
+			break
+		}
+		off2 := auxIntToInt32(v_0.AuxInt)
+		sym2 := auxToSym(v_0.Aux)
+		base := v_0.Args[0]
+		mem := v_1
+		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVWload)
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(mergeSym(sym1, sym2))
+		v.AddArg2(base, mem)
+		return true
+	}
+	// match: (FMOVWload [off1] {sym} (ADDI [off2] base) mem)
+	// cond: is32Bit(int64(off1)+off2)
+	// result: (FMOVWload [off1+int32(off2)] {sym} base mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64ADDI {
+			break
+		}
+		off2 := auxIntToInt64(v_0.AuxInt)
+		base := v_0.Args[0]
+		mem := v_1
+		if !(is32Bit(int64(off1) + off2)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVWload)
+		v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+		v.Aux = symToAux(sym)
+		v.AddArg2(base, mem)
+		return true
+	}
+	// match: (FMOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _))
+	// cond: isSamePtr(ptr1, ptr2)
+	// result: (FMVSX x)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr1 := v_0
+		if v_1.Op != OpRISCV64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+			break
+		}
+		x := v_1.Args[1]
+		ptr2 := v_1.Args[0]
+		if !(isSamePtr(ptr1, ptr2)) {
+			break
+		}
+		v.reset(OpRISCV64FMVSX)
+		v.AddArg(x)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64FMOVWstore(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	config := b.Func.Config
+	// match: (FMOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem)
+	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
+	// result: (FMOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym1 := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64MOVaddr {
+			break
+		}
+		off2 := auxIntToInt32(v_0.AuxInt)
+		sym2 := auxToSym(v_0.Aux)
+		base := v_0.Args[0]
+		val := v_1
+		mem := v_2
+		if !(is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVWstore)
+		v.AuxInt = int32ToAuxInt(off1 + off2)
+		v.Aux = symToAux(mergeSym(sym1, sym2))
+		v.AddArg3(base, val, mem)
+		return true
+	}
+	// match: (FMOVWstore [off1] {sym} (ADDI [off2] base) val mem)
+	// cond: is32Bit(int64(off1)+off2)
+	// result: (FMOVWstore [off1+int32(off2)] {sym} base val mem)
+	for {
+		off1 := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		if v_0.Op != OpRISCV64ADDI {
+			break
+		}
+		off2 := auxIntToInt64(v_0.AuxInt)
+		base := v_0.Args[0]
+		val := v_1
+		mem := v_2
+		if !(is32Bit(int64(off1) + off2)) {
+			break
+		}
+		v.reset(OpRISCV64FMOVWstore)
+		v.AuxInt = int32ToAuxInt(off1 + int32(off2))
+		v.Aux = symToAux(sym)
+		v.AddArg3(base, val, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueRISCV64_OpRISCV64FMSUBD(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@ -3934,6 +4337,59 @@ func rewriteValueRISCV64_OpRISCV64FMSUBS(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueRISCV64_OpRISCV64FNED(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	b := v.Block
+	typ := &b.Func.Config.Types
+	// match: (FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(-1)))])))
+	// result: (SEQZ (ANDI <typ.Int64> [1] (FCLASSD x)))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpRISCV64FMVDX {
+				continue
+			}
+			v_1_0 := v_1.Args[0]
+			if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(-1))) {
+				continue
+			}
+			v.reset(OpRISCV64SEQZ)
+			v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+			v0.AuxInt = int64ToAuxInt(1)
+			v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+			v1.AddArg(x)
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	// match: (FNED x (FMVDX (MOVDconst [int64(math.Float64bits(math.Inf(1)))])))
+	// result: (SEQZ (ANDI <typ.Int64> [1<<7] (FCLASSD x)))
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpRISCV64FMVDX {
+				continue
+			}
+			v_1_0 := v_1.Args[0]
+			if v_1_0.Op != OpRISCV64MOVDconst || auxIntToInt64(v_1_0.AuxInt) != int64(math.Float64bits(math.Inf(1))) {
+				continue
+			}
+			v.reset(OpRISCV64SEQZ)
+			v0 := b.NewValue0(v.Pos, OpRISCV64ANDI, typ.Int64)
+			v0.AuxInt = int64ToAuxInt(1 << 7)
+			v1 := b.NewValue0(v.Pos, OpRISCV64FCLASSD, typ.Int64)
+			v1.AddArg(x)
+			v0.AddArg(v1)
+			v.AddArg(v0)
+			return true
+		}
+		break
+	}
+	return false
+}
 func rewriteValueRISCV64_OpRISCV64FNMADDD(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@ -4977,6 +5433,25 @@ func rewriteValueRISCV64_OpRISCV64MOVDload(v *Value) bool {
 		v.AddArg2(base, mem)
 		return true
 	}
+	// match: (MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _))
+	// cond: isSamePtr(ptr1, ptr2)
+	// result: (FMVXD x)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr1 := v_0
+		if v_1.Op != OpRISCV64FMOVDstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+			break
+		}
+		x := v_1.Args[1]
+		ptr2 := v_1.Args[0]
+		if !(isSamePtr(ptr1, ptr2)) {
+			break
+		}
+		v.reset(OpRISCV64FMVXD)
+		v.AddArg(x)
+		return true
+	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVDnop(v *Value) bool {
@ -5658,6 +6133,7 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
 	config := b.Func.Config
+	typ := &b.Func.Config.Types
 	// match: (MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem)
 	// cond: is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_dynlink)
 	// result: (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
@ -5701,6 +6177,27 @@ func rewriteValueRISCV64_OpRISCV64MOVWUload(v *Value) bool {
 		v.AddArg2(base, mem)
 		return true
 	}
+	// match: (MOVWUload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
+	// cond: isSamePtr(ptr1, ptr2)
+	// result: (MOVWUreg (FMVXS x))
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr1 := v_0
+		if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+			break
+		}
+		x := v_1.Args[1]
+		ptr2 := v_1.Args[0]
+		if !(isSamePtr(ptr1, ptr2)) {
+			break
+		}
+		v.reset(OpRISCV64MOVWUreg)
+		v0 := b.NewValue0(v_1.Pos, OpRISCV64FMVXS, typ.Int32)
+		v0.AddArg(x)
+		v.AddArg(v0)
+		return true
+	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVWUreg(v *Value) bool {
@ -5891,6 +6388,25 @@ func rewriteValueRISCV64_OpRISCV64MOVWload(v *Value) bool {
 		v.AddArg2(base, mem)
 		return true
 	}
+	// match: (MOVWload [off] {sym} ptr1 (FMOVWstore [off] {sym} ptr2 x _))
+	// cond: isSamePtr(ptr1, ptr2)
+	// result: (FMVXS x)
+	for {
+		off := auxIntToInt32(v.AuxInt)
+		sym := auxToSym(v.Aux)
+		ptr1 := v_0
+		if v_1.Op != OpRISCV64FMOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym {
+			break
+		}
+		x := v_1.Args[1]
+		ptr2 := v_1.Args[0]
+		if !(isSamePtr(ptr1, ptr2)) {
+			break
+		}
+		v.reset(OpRISCV64FMVXS)
+		v.AddArg(x)
+		return true
+	}
 	return false
 }
 func rewriteValueRISCV64_OpRISCV64MOVWreg(v *Value) bool {
--- a/src/cmd/compile/internal/ssagen/ssa.go
+++ b/src/cmd/compile/internal/ssagen/ssa.go
@ -184,42 +184,6 @@ func InitConfig() {
 		BoundsCheckFunc[ssa.BoundsSlice3C] = typecheck.LookupRuntimeFunc("goPanicSlice3C")
 		BoundsCheckFunc[ssa.BoundsSlice3CU] = typecheck.LookupRuntimeFunc("goPanicSlice3CU")
 		BoundsCheckFunc[ssa.BoundsConvert] = typecheck.LookupRuntimeFunc("goPanicSliceConvert")
-	} else {
-		BoundsCheckFunc[ssa.BoundsIndex] = typecheck.LookupRuntimeFunc("panicIndex")
-		BoundsCheckFunc[ssa.BoundsIndexU] = typecheck.LookupRuntimeFunc("panicIndexU")
-		BoundsCheckFunc[ssa.BoundsSliceAlen] = typecheck.LookupRuntimeFunc("panicSliceAlen")
-		BoundsCheckFunc[ssa.BoundsSliceAlenU] = typecheck.LookupRuntimeFunc("panicSliceAlenU")
-		BoundsCheckFunc[ssa.BoundsSliceAcap] = typecheck.LookupRuntimeFunc("panicSliceAcap")
-		BoundsCheckFunc[ssa.BoundsSliceAcapU] = typecheck.LookupRuntimeFunc("panicSliceAcapU")
-		BoundsCheckFunc[ssa.BoundsSliceB] = typecheck.LookupRuntimeFunc("panicSliceB")
-		BoundsCheckFunc[ssa.BoundsSliceBU] = typecheck.LookupRuntimeFunc("panicSliceBU")
-		BoundsCheckFunc[ssa.BoundsSlice3Alen] = typecheck.LookupRuntimeFunc("panicSlice3Alen")
-		BoundsCheckFunc[ssa.BoundsSlice3AlenU] = typecheck.LookupRuntimeFunc("panicSlice3AlenU")
-		BoundsCheckFunc[ssa.BoundsSlice3Acap] = typecheck.LookupRuntimeFunc("panicSlice3Acap")
-		BoundsCheckFunc[ssa.BoundsSlice3AcapU] = typecheck.LookupRuntimeFunc("panicSlice3AcapU")
-		BoundsCheckFunc[ssa.BoundsSlice3B] = typecheck.LookupRuntimeFunc("panicSlice3B")
-		BoundsCheckFunc[ssa.BoundsSlice3BU] = typecheck.LookupRuntimeFunc("panicSlice3BU")
-		BoundsCheckFunc[ssa.BoundsSlice3C] = typecheck.LookupRuntimeFunc("panicSlice3C")
-		BoundsCheckFunc[ssa.BoundsSlice3CU] = typecheck.LookupRuntimeFunc("panicSlice3CU")
-		BoundsCheckFunc[ssa.BoundsConvert] = typecheck.LookupRuntimeFunc("panicSliceConvert")
-	}
-	if Arch.LinkArch.PtrSize == 4 {
-		ExtendCheckFunc[ssa.BoundsIndex] = typecheck.LookupRuntimeVar("panicExtendIndex")
-		ExtendCheckFunc[ssa.BoundsIndexU] = typecheck.LookupRuntimeVar("panicExtendIndexU")
-		ExtendCheckFunc[ssa.BoundsSliceAlen] = typecheck.LookupRuntimeVar("panicExtendSliceAlen")
-		ExtendCheckFunc[ssa.BoundsSliceAlenU] = typecheck.LookupRuntimeVar("panicExtendSliceAlenU")
-		ExtendCheckFunc[ssa.BoundsSliceAcap] = typecheck.LookupRuntimeVar("panicExtendSliceAcap")
-		ExtendCheckFunc[ssa.BoundsSliceAcapU] = typecheck.LookupRuntimeVar("panicExtendSliceAcapU")
-		ExtendCheckFunc[ssa.BoundsSliceB] = typecheck.LookupRuntimeVar("panicExtendSliceB")
-		ExtendCheckFunc[ssa.BoundsSliceBU] = typecheck.LookupRuntimeVar("panicExtendSliceBU")
-		ExtendCheckFunc[ssa.BoundsSlice3Alen] = typecheck.LookupRuntimeVar("panicExtendSlice3Alen")
-		ExtendCheckFunc[ssa.BoundsSlice3AlenU] = typecheck.LookupRuntimeVar("panicExtendSlice3AlenU")
-		ExtendCheckFunc[ssa.BoundsSlice3Acap] = typecheck.LookupRuntimeVar("panicExtendSlice3Acap")
-		ExtendCheckFunc[ssa.BoundsSlice3AcapU] = typecheck.LookupRuntimeVar("panicExtendSlice3AcapU")
-		ExtendCheckFunc[ssa.BoundsSlice3B] = typecheck.LookupRuntimeVar("panicExtendSlice3B")
-		ExtendCheckFunc[ssa.BoundsSlice3BU] = typecheck.LookupRuntimeVar("panicExtendSlice3BU")
-		ExtendCheckFunc[ssa.BoundsSlice3C] = typecheck.LookupRuntimeVar("panicExtendSlice3C")
-		ExtendCheckFunc[ssa.BoundsSlice3CU] = typecheck.LookupRuntimeVar("panicExtendSlice3CU")
 	}

 	// Wasm (all asm funcs with special ABIs)
@ -1366,9 +1330,6 @@ func (s *state) constInt(t *types.Type, c int64) *ssa.Value {
 	}
 	return s.constInt32(t, int32(c))
 }
-func (s *state) constOffPtrSP(t *types.Type, c int64) *ssa.Value {
-	return s.f.ConstOffPtrSP(t, c, s.sp)
-}

 // newValueOrSfCall* are wrappers around newValue*, which may create a call to a
 // soft-float runtime function instead (when emitting soft-float code).
@ -5428,26 +5389,6 @@ func (s *state) putArg(n ir.Node, t *types.Type) *ssa.Value {
 	return a
 }

-func (s *state) storeArgWithBase(n ir.Node, t *types.Type, base *ssa.Value, off int64) {
-	pt := types.NewPtr(t)
-	var addr *ssa.Value
-	if base == s.sp {
-		// Use special routine that avoids allocation on duplicate offsets.
-		addr = s.constOffPtrSP(pt, off)
-	} else {
-		addr = s.newValue1I(ssa.OpOffPtr, pt, off, base)
-	}
-
-	if !ssa.CanSSA(t) {
-		a := s.addr(n)
-		s.move(t, addr, a)
-		return
-	}
-
-	a := s.expr(n)
-	s.storeType(t, addr, a, 0, false)
-}
-
 // slice computes the slice v[i:j:k] and returns ptr, len, and cap of result.
 // i,j,k may be nil, in which case they are set to their default value.
 // v may be a slice, string or pointer to an array.
@ -7772,7 +7713,5 @@ func isStructNotSIMD(t *types.Type) bool {
 	return t.IsStruct() && !t.IsSIMD()
 }

-var (
-	BoundsCheckFunc [ssa.BoundsKindCount]*obj.LSym
-	ExtendCheckFunc [ssa.BoundsKindCount]*obj.LSym
-)
+var BoundsCheckFunc [ssa.BoundsKindCount]*obj.LSym
+
--- a/src/cmd/compile/internal/staticinit/sched.go
+++ b/src/cmd/compile/internal/staticinit/sched.go
@ -622,12 +622,6 @@ func (s *Schedule) staticAssignInlinedCall(l *ir.Name, loff int64, call *ir.Inli
 	// 	INLCALL-ReturnVars
 	// 	.   NAME-p.~R0 Class:PAUTO Offset:0 OnStack Used PTR-*T tc(1) # x.go:18:13
 	//
-	// In non-unified IR, the tree is slightly different:
-	//  - if there are no arguments to the inlined function,
-	//    the INLCALL-init omits the AS2.
-	//  - the DCL inside BLOCK is on the AS2's init list,
-	//    not its own statement in the top level of the BLOCK.
-	//
 	// If the init values are side-effect-free and each either only
 	// appears once in the function body or is safely repeatable,
 	// then we inline the value expressions into the return argument
@ -647,39 +641,26 @@ func (s *Schedule) staticAssignInlinedCall(l *ir.Name, loff int64, call *ir.Inli
 	// is the most important case for us to get right.

 	init := call.Init()
-	var as2init *ir.AssignListStmt
-	if len(init) == 2 && init[0].Op() == ir.OAS2 && init[1].Op() == ir.OINLMARK {
-		as2init = init[0].(*ir.AssignListStmt)
-	} else if len(init) == 1 && init[0].Op() == ir.OINLMARK {
-		as2init = new(ir.AssignListStmt)
-	} else {
+	if len(init) != 2 || init[0].Op() != ir.OAS2 || init[1].Op() != ir.OINLMARK {
 		return false
 	}
+	as2init := init[0].(*ir.AssignListStmt)
+
 	if len(call.Body) != 2 || call.Body[0].Op() != ir.OBLOCK || call.Body[1].Op() != ir.OLABEL {
 		return false
 	}
 	label := call.Body[1].(*ir.LabelStmt).Label
 	block := call.Body[0].(*ir.BlockStmt)
 	list := block.List
-	var dcl *ir.Decl
-	if len(list) == 3 && list[0].Op() == ir.ODCL {
-		dcl = list[0].(*ir.Decl)
-		list = list[1:]
-	}
-	if len(list) != 2 ||
-		list[0].Op() != ir.OAS2 ||
-		list[1].Op() != ir.OGOTO ||
-		list[1].(*ir.BranchStmt).Label != label {
+	if len(list) != 3 ||
+		list[0].Op() != ir.ODCL ||
+		list[1].Op() != ir.OAS2 ||
+		list[2].Op() != ir.OGOTO ||
+		list[2].(*ir.BranchStmt).Label != label {
 		return false
 	}
-	as2body := list[0].(*ir.AssignListStmt)
-	if dcl == nil {
-		ainit := as2body.Init()
-		if len(ainit) != 1 || ainit[0].Op() != ir.ODCL {
-			return false
-		}
-		dcl = ainit[0].(*ir.Decl)
-	}
+	dcl := list[0].(*ir.Decl)
+	as2body := list[1].(*ir.AssignListStmt)
 	if len(as2body.Lhs) != 1 || as2body.Lhs[0] != dcl.X {
 		return false
 	}
--- a/src/cmd/compile/internal/syntax/printer.go
+++ b/src/cmd/compile/internal/syntax/printer.go
@ -138,10 +138,6 @@ func impliesSemi(tok token) bool {

 // TODO(gri) provide table of []byte values for all tokens to avoid repeated string conversion

-func lineComment(text string) bool {
-	return strings.HasPrefix(text, "//")
-}
-
 func (p *printer) addWhitespace(kind ctrlSymbol, text string) {
 	p.pending = append(p.pending, whitespace{p.lastTok, kind /*text*/})
 	switch kind {
--- a/src/cmd/compile/internal/test/bench_test.go
+++ b/src/cmd/compile/internal/test/bench_test.go
@ -122,3 +122,26 @@ func BenchmarkBitToggleConst(b *testing.B) {
 		}
 	}
 }
+
+func BenchmarkMulNeg(b *testing.B) {
+	x := make([]int64, 1024)
+	for i := 0; i < b.N; i++ {
+		var s int64
+		for i := range x {
+			s = (-x[i]) * 11
+		}
+		globl = s
+	}
+}
+
+func BenchmarkMul2Neg(b *testing.B) {
+	x := make([]int64, 1024)
+	y := make([]int64, 1024)
+	for i := 0; i < b.N; i++ {
+		var s int64
+		for i := range x {
+			s = (-x[i]) * (-y[i])
+		}
+		globl = s
+	}
+}
--- a/src/cmd/compile/internal/test/float_test.go
+++ b/src/cmd/compile/internal/test/float_test.go
@ -523,6 +523,106 @@ func TestFloatSignalingNaNConversionConst(t *testing.T) {
 	}
 }

+//go:noinline
+func isPosInf(x float64) bool {
+	return math.IsInf(x, 1)
+}
+
+//go:noinline
+func isPosInfEq(x float64) bool {
+	return x == math.Inf(1)
+}
+
+//go:noinline
+func isPosInfCmp(x float64) bool {
+	return x > math.MaxFloat64
+}
+
+//go:noinline
+func isNotPosInf(x float64) bool {
+	return !math.IsInf(x, 1)
+}
+
+//go:noinline
+func isNotPosInfEq(x float64) bool {
+	return x != math.Inf(1)
+}
+
+//go:noinline
+func isNotPosInfCmp(x float64) bool {
+	return x <= math.MaxFloat64
+}
+
+//go:noinline
+func isNegInf(x float64) bool {
+	return math.IsInf(x, -1)
+}
+
+//go:noinline
+func isNegInfEq(x float64) bool {
+	return x == math.Inf(-1)
+}
+
+//go:noinline
+func isNegInfCmp(x float64) bool {
+	return x < -math.MaxFloat64
+}
+
+//go:noinline
+func isNotNegInf(x float64) bool {
+	return !math.IsInf(x, -1)
+}
+
+//go:noinline
+func isNotNegInfEq(x float64) bool {
+	return x != math.Inf(-1)
+}
+
+//go:noinline
+func isNotNegInfCmp(x float64) bool {
+	return x >= -math.MaxFloat64
+}
+
+func TestInf(t *testing.T) {
+	tests := []struct {
+		value    float64
+		isPosInf bool
+		isNegInf bool
+		isNaN    bool
+	}{
+		{value: math.Inf(1), isPosInf: true},
+		{value: math.MaxFloat64},
+		{value: math.Inf(-1), isNegInf: true},
+		{value: -math.MaxFloat64},
+		{value: math.NaN(), isNaN: true},
+	}
+
+	check := func(name string, f func(x float64) bool, value float64, want bool) {
+		got := f(value)
+		if got != want {
+			t.Errorf("%v(%g): want %v, got %v", name, value, want, got)
+		}
+	}
+
+	for _, test := range tests {
+		check("isPosInf", isPosInf, test.value, test.isPosInf)
+		check("isPosInfEq", isPosInfEq, test.value, test.isPosInf)
+		check("isPosInfCmp", isPosInfCmp, test.value, test.isPosInf)
+
+		check("isNotPosInf", isNotPosInf, test.value, !test.isPosInf)
+		check("isNotPosInfEq", isNotPosInfEq, test.value, !test.isPosInf)
+		check("isNotPosInfCmp", isNotPosInfCmp, test.value, !test.isPosInf && !test.isNaN)
+
+		check("isNegInf", isNegInf, test.value, test.isNegInf)
+		check("isNegInfEq", isNegInfEq, test.value, test.isNegInf)
+		check("isNegInfCmp", isNegInfCmp, test.value, test.isNegInf)
+
+		check("isNotNegInf", isNotNegInf, test.value, !test.isNegInf)
+		check("isNotNegInfEq", isNotNegInfEq, test.value, !test.isNegInf)
+		check("isNotNegInfCmp", isNotNegInfCmp, test.value, !test.isNegInf && !test.isNaN)
+	}
+}
+
 var sinkFloat float64

 func BenchmarkMul2(b *testing.B) {
--- a/src/cmd/compile/internal/typecheck/iexport.go
+++ b/src/cmd/compile/internal/typecheck/iexport.go
@ -235,27 +235,7 @@

 package typecheck

-import (
-	"strings"
-)
-
 const blankMarker = "$"

-// TparamName returns the real name of a type parameter, after stripping its
-// qualifying prefix and reverting blank-name encoding. See TparamExportName
-// for details.
-func TparamName(exportName string) string {
-	// Remove the "path" from the type param name that makes it unique.
-	ix := strings.LastIndex(exportName, ".")
-	if ix < 0 {
-		return ""
-	}
-	name := exportName[ix+1:]
-	if strings.HasPrefix(name, blankMarker) {
-		return "_"
-	}
-	return name
-}
-
 // The name used for dictionary parameters or local variables.
 const LocalDictName = ".dict"
--- a/src/cmd/compile/internal/typecheck/stmt.go
+++ b/src/cmd/compile/internal/typecheck/stmt.go
@ -19,9 +19,6 @@ func RangeExprType(t *types.Type) *types.Type {
 	return t
 }

-func typecheckrangeExpr(n *ir.RangeStmt) {
-}
-
 // type check assignment.
 // if this assignment is the definition of a var on the left side,
 // fill in the var's type.
--- a/src/cmd/compile/internal/types/type.go
+++ b/src/cmd/compile/internal/types/type.go
@ -1712,13 +1712,6 @@ func fieldsHasShape(fields []*Field) bool {
 	return false
 }

-// newBasic returns a new basic type of the given kind.
-func newBasic(kind Kind, obj Object) *Type {
-	t := newType(kind)
-	t.obj = obj
-	return t
-}
-
 // NewInterface returns a new interface for the given methods and
 // embedded types. Embedded types are specified as fields with no Sym.
 func NewInterface(methods []*Field) *Type {
--- a/src/cmd/compile/internal/types2/api.go
+++ b/src/cmd/compile/internal/types2/api.go
@ -187,10 +187,6 @@ type Config struct {
 	EnableAlias bool
 }

-func srcimporter_setUsesCgo(conf *Config) {
-	conf.go115UsesCgo = true
-}
-
 // Info holds result type information for a type-checked package.
 // Only the information for which a map is provided is collected.
 // If the package has type errors, the collected information may
--- a/src/cmd/compile/internal/types2/check.go
+++ b/src/cmd/compile/internal/types2/check.go
@ -22,7 +22,7 @@ var nopos syntax.Pos
 const debug = false // leave on during development

 // position tracing for panics during type checking
-const tracePos = false // TODO(markfreeman): check performance implications
+const tracePos = true

 // _aliasAny changes the behavior of [Scope.Lookup] for "any" in the
 // [Universe] scope.
--- a/src/cmd/compile/internal/types2/compilersupport.go
+++ b/src/cmd/compile/internal/types2/compilersupport.go
@ -13,12 +13,6 @@ func AsPointer(t Type) *Pointer {
 	return u
 }

-// If t is a signature, AsSignature returns that type, otherwise it returns nil.
-func AsSignature(t Type) *Signature {
-	u, _ := t.Underlying().(*Signature)
-	return u
-}
-
 // If typ is a type parameter, CoreType returns the single underlying
 // type of all types in the corresponding type constraint if it exists, or
 // nil otherwise. If the type set contains only unrestricted and restricted
--- a/src/cmd/compile/internal/types2/decl.go
+++ b/src/cmd/compile/internal/types2/decl.go
@ -8,7 +8,6 @@ import (
 	"cmd/compile/internal/syntax"
 	"fmt"
 	"go/constant"
-	"internal/buildcfg"
 	. "internal/types/errors"
 	"slices"
 )
@ -525,10 +524,6 @@ func (check *Checker) typeDecl(obj *TypeName, tdecl *syntax.TypeDecl, def *TypeN

 			// handle type parameters even if not allowed (Alias type is supported)
 			if tparam0 != nil {
-				if !versionErr && !buildcfg.Experiment.AliasTypeParams {
-					check.error(tdecl, UnsupportedFeature, "generic type alias requires GOEXPERIMENT=aliastypeparams")
-					versionErr = true
-				}
 				check.openScope(tdecl, "type parameters")
 				defer check.closeScope()
 				check.collectTypeParams(&alias.tparams, tdecl.TParamList)
--- a/src/cmd/compile/internal/types2/instantiate.go
+++ b/src/cmd/compile/internal/types2/instantiate.go
@ -11,7 +11,6 @@ import (
 	"cmd/compile/internal/syntax"
 	"errors"
 	"fmt"
-	"internal/buildcfg"
 	. "internal/types/errors"
 )

@ -130,10 +129,6 @@ func (check *Checker) instance(pos syntax.Pos, orig genericType, targs []Type, e
 		res = check.newNamedInstance(pos, orig, targs, expanding) // substituted lazily

 	case *Alias:
-		if !buildcfg.Experiment.AliasTypeParams {
-			assert(expanding == nil) // Alias instances cannot be reached from Named types
-		}
-
 		// verify type parameter count (see go.dev/issue/71198 for a test case)
 		tparams := orig.TypeParams()
 		if !check.validateTArgLen(pos, orig.obj.Name(), tparams.Len(), len(targs)) {
--- a/src/cmd/compile/internal/types2/object_test.go
+++ b/src/cmd/compile/internal/types2/object_test.go
@ -99,8 +99,7 @@ var testObjects = []struct {
 	{"type t = struct{f int}", "t", "type p.t = struct{f int}", false},
 	{"type t = func(int)", "t", "type p.t = func(int)", false},
 	{"type A = B; type B = int", "A", "type p.A = p.B", true},
-	{"type A[P ~int] = struct{}", "A", "type p.A[P ~int] = struct{}", true}, // requires GOEXPERIMENT=aliastypeparams
-
+	{"type A[P ~int] = struct{}", "A", "type p.A[P ~int] = struct{}", true},
 	{"var v int", "v", "var p.v int", false},

 	{"func f(int) string", "f", "func p.f(int) string", false},
@ -114,10 +113,6 @@ func TestObjectString(t *testing.T) {

 	for i, test := range testObjects {
 		t.Run(fmt.Sprint(i), func(t *testing.T) {
-			if test.alias {
-				revert := setGOEXPERIMENT("aliastypeparams")
-				defer revert()
-			}
 			src := "package p; " + test.src
 			conf := Config{Error: func(error) {}, Importer: defaultImporter(), EnableAlias: test.alias}
 			pkg, err := typecheck(src, &conf, nil)
--- a/src/cmd/compile/internal/types2/stdlib_test.go
+++ b/src/cmd/compile/internal/types2/stdlib_test.go
@ -332,6 +332,8 @@ func TestStdFixed(t *testing.T) {
 		"issue49814.go",  // go/types does not have constraints on array size
 		"issue56103.go",  // anonymous interface cycles; will be a type checker error in 1.22
 		"issue52697.go",  // types2 does not have constraints on stack size
+		"issue68054.go",  // this test requires GODEBUG=gotypesalias=1
+		"issue68580.go",  // this test requires GODEBUG=gotypesalias=1
 		"issue73309.go",  // this test requires GODEBUG=gotypesalias=1
 		"issue73309b.go", // this test requires GODEBUG=gotypesalias=1

--- a/src/cmd/compile/internal/walk/expr.go
+++ b/src/cmd/compile/internal/walk/expr.go
@ -131,6 +131,14 @@ func walkExpr1(n ir.Node, init *ir.Nodes) ir.Node {
 		n := n.(*ir.BinaryExpr)
 		n.X = walkExpr(n.X, init)
 		n.Y = walkExpr(n.Y, init)
+		if n.Op() == ir.OUNSAFEADD && ir.ShouldCheckPtr(ir.CurFunc, 1) {
+			// For unsafe.Add(p, n), just walk "unsafe.Pointer(uintptr(p)+uintptr(n))"
+			// for the side effects of validating unsafe.Pointer rules.
+			x := typecheck.ConvNop(n.X, types.Types[types.TUINTPTR])
+			y := typecheck.Conv(n.Y, types.Types[types.TUINTPTR])
+			conv := typecheck.ConvNop(ir.NewBinaryExpr(n.Pos(), ir.OADD, x, y), types.Types[types.TUNSAFEPTR])
+			walkExpr(conv, init)
+		}
 		return n

 	case ir.OUNSAFESLICE:
--- a/src/cmd/dist/README
+++ b/src/cmd/dist/README
@ -4,18 +4,17 @@ As of Go 1.5, dist and other parts of the compiler toolchain are written
 in Go, making bootstrapping a little more involved than in the past.
 The approach is to build the current release of Go with an earlier one.

-The process to install Go 1.x, for x ≥ 24, is:
+The process to install Go 1.x, for x ≥ 26, is:

-1. Build cmd/dist with Go 1.22.6.
-2. Using dist, build Go 1.x compiler toolchain with Go 1.22.6.
+1. Build cmd/dist with Go 1.24.6.
+2. Using dist, build Go 1.x compiler toolchain with Go 1.24.6.
 3. Using dist, rebuild Go 1.x compiler toolchain with itself.
 4. Using dist, build Go 1.x cmd/go (as go_bootstrap) with Go 1.x compiler toolchain.
 5. Using go_bootstrap, build the remaining Go 1.x standard library and commands.

-Because of backward compatibility, although the steps above say Go 1.22.6,
-in practice any release ≥ Go 1.22.6 but < Go 1.x will work as the bootstrap base.
+Because of backward compatibility, although the steps above say Go 1.24.6,
+in practice any release ≥ Go 1.24.6 but < Go 1.x will work as the bootstrap base.
 Releases ≥ Go 1.x are very likely to work as well.

-See https://go.dev/s/go15bootstrap for more details about the original bootstrap
-and https://go.dev/issue/54265 for details about later bootstrap version bumps.
-
+See go.dev/s/go15bootstrap for more details about the original bootstrap
+and go.dev/issue/54265 for details about later bootstrap version bumps.
--- a/src/cmd/dist/build.go
+++ b/src/cmd/dist/build.go
@ -1819,7 +1819,6 @@ var cgoEnabled = map[string]bool{
 	"solaris/amd64":   true,
 	"windows/386":     true,
 	"windows/amd64":   true,
-	"windows/arm":     false,
 	"windows/arm64":   true,
 }

@ -1828,9 +1827,9 @@ var cgoEnabled = map[string]bool{
 // get filtered out of cgoEnabled for 'dist list'.
 // See go.dev/issue/56679.
 var broken = map[string]bool{
-	"linux/sparc64":  true, // An incomplete port. See CL 132155.
-	"openbsd/mips64": true, // Broken: go.dev/issue/58110.
-	"windows/arm":    true, // Broken: go.dev/issue/68552.
+	"freebsd/riscv64": true, // Broken: go.dev/issue/73568.
+	"linux/sparc64":   true, // An incomplete port. See CL 132155.
+	"openbsd/mips64":  true, // Broken: go.dev/issue/58110.
 }

 // List of platforms which are first class ports. See go.dev/issue/38874.
--- a/src/cmd/dist/buildtool.go
+++ b/src/cmd/dist/buildtool.go
@ -121,7 +121,7 @@ var ignoreSuffixes = []string{
 	"~",
 }

-const minBootstrap = "go1.22.6"
+const minBootstrap = "go1.24.6"

 var tryDirs = []string{
 	"sdk/" + minBootstrap,
--- a/src/cmd/dist/imports.go
+++ b/src/cmd/dist/imports.go
@ -205,18 +205,6 @@ func (r *importReader) readImport(imports *[]string) {
 	r.readString(imports)
 }

-// readComments is like ioutil.ReadAll, except that it only reads the leading
-// block of comments in the file.
-func readComments(f io.Reader) ([]byte, error) {
-	r := &importReader{b: bufio.NewReader(f)}
-	r.peekByte(true)
-	if r.err == nil && !r.eof {
-		// Didn't reach EOF, so must have found a non-space byte. Remove it.
-		r.buf = r.buf[:len(r.buf)-1]
-	}
-	return r.buf, r.err
-}
-
 // readimports returns the imports found in the named file.
 func readimports(file string) []string {
 	var imports []string
--- a/src/cmd/dist/notgo124.go
+++ b/src/cmd/dist/notgo124.go
@ -2,20 +2,20 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Go 1.24 and later requires Go 1.22.6 as the bootstrap toolchain.
+// Go 1.26 and later requires Go 1.24.6 as the minimum bootstrap toolchain.
 // If cmd/dist is built using an earlier Go version, this file will be
 // included in the build and cause an error like:
 //
 // % GOROOT_BOOTSTRAP=$HOME/sdk/go1.16 ./make.bash
 // Building Go cmd/dist using /Users/rsc/sdk/go1.16. (go1.16 darwin/amd64)
-// found packages main (build.go) and building_Go_requires_Go_1_22_6_or_later (notgo122.go) in /Users/rsc/go/src/cmd/dist
+// found packages main (build.go) and building_Go_requires_Go_1_24_6_or_later (notgo124.go) in /Users/rsc/go/src/cmd/dist
 // %
 //
 // which is the best we can do under the circumstances.
 //
-// See go.dev/issue/44505 for more background on
-// why Go moved on from Go 1.4 for bootstrap.
+// See go.dev/issue/44505 and go.dev/issue/54265 for more
+// background on why Go moved on from Go 1.4 for bootstrap.

-//go:build !go1.22
+//go:build !go1.24

-package building_Go_requires_Go_1_22_6_or_later
+package building_Go_requires_Go_1_24_6_or_later
--- a/src/cmd/dist/sys_windows.go
+++ b/src/cmd/dist/sys_windows.go
@ -33,7 +33,6 @@ type systeminfo struct {
 const (
 	PROCESSOR_ARCHITECTURE_AMD64 = 9
 	PROCESSOR_ARCHITECTURE_INTEL = 0
-	PROCESSOR_ARCHITECTURE_ARM   = 5
 	PROCESSOR_ARCHITECTURE_ARM64 = 12
 	PROCESSOR_ARCHITECTURE_IA64  = 6
 )
@ -47,8 +46,6 @@ func sysinit() {
 		gohostarch = "amd64"
 	case PROCESSOR_ARCHITECTURE_INTEL:
 		gohostarch = "386"
-	case PROCESSOR_ARCHITECTURE_ARM:
-		gohostarch = "arm"
 	case PROCESSOR_ARCHITECTURE_ARM64:
 		gohostarch = "arm64"
 	default:
--- a/src/cmd/dist/util.go
+++ b/src/cmd/dist/util.go
@ -362,16 +362,6 @@ func errprintf(format string, args ...interface{}) {
 	fmt.Fprintf(os.Stderr, format, args...)
 }

-// xsamefile reports whether f1 and f2 are the same file (or dir).
-func xsamefile(f1, f2 string) bool {
-	fi1, err1 := os.Stat(f1)
-	fi2, err2 := os.Stat(f2)
-	if err1 != nil || err2 != nil {
-		return f1 == f2
-	}
-	return os.SameFile(fi1, fi2)
-}
-
 func xgetgoarm() string {
 	// If we're building on an actual arm system, and not building
 	// a cross-compiling toolchain, try to exec ourselves
--- a/src/cmd/go.mod
+++ b/src/cmd/go.mod
@ -1,21 +1,21 @@
 module cmd

-go 1.25
+go 1.26

 require (
-	github.com/google/pprof v0.0.0-20250208200701-d0013a598941
-	golang.org/x/arch v0.18.1-0.20250605182141-b2f4e2807dec
-	golang.org/x/build v0.0.0-20250606033421-8c8ff6f34a83
-	golang.org/x/mod v0.25.0
-	golang.org/x/sync v0.15.0
-	golang.org/x/sys v0.33.0
-	golang.org/x/telemetry v0.0.0-20250606142133-60998feb31a8
-	golang.org/x/term v0.32.0
-	golang.org/x/tools v0.34.0
+	github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5
+	golang.org/x/arch v0.20.1-0.20250808194827-46ba08e3ae58
+	golang.org/x/build v0.0.0-20250806225920-b7c66c047964
+	golang.org/x/mod v0.27.0
+	golang.org/x/sync v0.16.0
+	golang.org/x/sys v0.35.0
+	golang.org/x/telemetry v0.0.0-20250807160809-1a19826ec488
+	golang.org/x/term v0.34.0
+	golang.org/x/tools v0.36.1-0.20250808220315-8866876b956f
 )

 require (
-	github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd // indirect
-	golang.org/x/text v0.26.0 // indirect
+	github.com/ianlancetaylor/demangle v0.0.0-20250417193237-f615e6bd150b // indirect
+	golang.org/x/text v0.28.0 // indirect
 	rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef // indirect
 )
--- a/src/cmd/go.sum
+++ b/src/cmd/go.sum
@ -1,28 +1,28 @@
 github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
 github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
-github.com/google/pprof v0.0.0-20250208200701-d0013a598941 h1:43XjGa6toxLpeksjcxs1jIoIyr+vUfOqY2c6HB4bpoc=
-github.com/google/pprof v0.0.0-20250208200701-d0013a598941/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
-github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd h1:EVX1s+XNss9jkRW9K6XGJn2jL2lB1h5H804oKPsxOec=
-github.com/ianlancetaylor/demangle v0.0.0-20240912202439-0a2b6291aafd/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
+github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5 h1:xhMrHhTJ6zxu3gA4enFM9MLn9AY7613teCdFnlUVbSQ=
+github.com/google/pprof v0.0.0-20250630185457-6e76a2b096b5/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA=
+github.com/ianlancetaylor/demangle v0.0.0-20250417193237-f615e6bd150b h1:ogbOPx86mIhFy764gGkqnkFC8m5PJA7sPzlk9ppLVQA=
+github.com/ianlancetaylor/demangle v0.0.0-20250417193237-f615e6bd150b/go.mod h1:gx7rwoVhcfuVKG5uya9Hs3Sxj7EIvldVofAWIUtGouw=
 github.com/yuin/goldmark v1.6.0 h1:boZcn2GTjpsynOsC0iJHnBWa4Bi0qzfJjthwauItG68=
 github.com/yuin/goldmark v1.6.0/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
-golang.org/x/arch v0.18.1-0.20250605182141-b2f4e2807dec h1:fCOjXc18tBlkVy4m+VuL1WU8VTukYOGtAk7nC5QYPRY=
-golang.org/x/arch v0.18.1-0.20250605182141-b2f4e2807dec/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
-golang.org/x/build v0.0.0-20250606033421-8c8ff6f34a83 h1:IiFSc399rOkpudtnsTDKdtfFEsvd+dGfNfl+ytV267c=
-golang.org/x/build v0.0.0-20250606033421-8c8ff6f34a83/go.mod h1:SDzKvZFXqZyl3tLink1AnKsAocWm0yFc3UfmxR6aIOw=
-golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
-golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
-golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8=
-golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
-golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw=
-golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
-golang.org/x/telemetry v0.0.0-20250606142133-60998feb31a8 h1:jBJ3nsFeGb1DzjhOg2ZgZTpnDnOZfHId7RNlBJUtkOM=
-golang.org/x/telemetry v0.0.0-20250606142133-60998feb31a8/go.mod h1:mUcjA5g0luJpMYCLjhH91f4t4RAUNp+zq9ZmUoqPD7M=
-golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg=
-golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ=
-golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M=
-golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA=
-golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo=
-golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg=
+golang.org/x/arch v0.20.1-0.20250808194827-46ba08e3ae58 h1:uxPa6+/WsUfzikIAPMqpTho10y4qtYpINBurU+6NrHE=
+golang.org/x/arch v0.20.1-0.20250808194827-46ba08e3ae58/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
+golang.org/x/build v0.0.0-20250806225920-b7c66c047964 h1:yRs1K51GKq7hsIO+YHJ8LsslrvwFceNPIv0tYjpcBd0=
+golang.org/x/build v0.0.0-20250806225920-b7c66c047964/go.mod h1:i9Vx7+aOQUpYJRxSO+OpRStVBCVL/9ccI51xblWm5WY=
+golang.org/x/mod v0.27.0 h1:kb+q2PyFnEADO2IEF935ehFUXlWiNjJWtRNgBLSfbxQ=
+golang.org/x/mod v0.27.0/go.mod h1:rWI627Fq0DEoudcK+MBkNkCe0EetEaDSwJJkCcjpazc=
+golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
+golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
+golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
+golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
+golang.org/x/telemetry v0.0.0-20250807160809-1a19826ec488 h1:3doPGa+Gg4snce233aCWnbZVFsyFMo/dR40KK/6skyE=
+golang.org/x/telemetry v0.0.0-20250807160809-1a19826ec488/go.mod h1:fGb/2+tgXXjhjHsTNdVEEMZNWA0quBnfrO+AfoDSAKw=
+golang.org/x/term v0.34.0 h1:O/2T7POpk0ZZ7MAzMeWFSg6S5IpWd/RXDlM9hgM3DR4=
+golang.org/x/term v0.34.0/go.mod h1:5jC53AEywhIVebHgPVeg0mj8OD3VO9OzclacVrqpaAw=
+golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
+golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
+golang.org/x/tools v0.36.1-0.20250808220315-8866876b956f h1:9m2Iptt9ZZU5llKDJy1XUl5d13PN1ZYV16KwOvE6jOw=
+golang.org/x/tools v0.36.1-0.20250808220315-8866876b956f/go.mod h1:WBDiHKJK8YgLHlcQPYQzNCkUxUypCaa5ZegCVutKm+s=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef h1:mqLYrXCXYEZOop9/Dbo6RPX11539nwiCNBb1icVPmw8=
 rsc.io/markdown v0.0.0-20240306144322-0bf8f97ee8ef/go.mod h1:8xcPgWmwlZONN1D9bjxtHEjrUtSEa3fakVF8iaewYKQ=
--- a/src/cmd/go/alldocs.go
+++ b/src/cmd/go/alldocs.go
@ -2527,7 +2527,7 @@
 //		The default is GOFIPS140=off, which makes no FIPS-140 changes at all.
 //		Other values enable FIPS-140 compliance measures and select alternate
 //		versions of the cryptography source code.
-//		See https://go.dev/security/fips140 for details.
+//		See https://go.dev/doc/security/fips140 for details.
 //	GO_EXTLINK_ENABLED
 //		Whether the linker should use external linking mode
 //		when using -linkmode=auto with code that uses cgo.
--- a/src/cmd/go/internal/gover/toolchain.go
+++ b/src/cmd/go/internal/gover/toolchain.go
@ -52,16 +52,6 @@ func maybeToolchainVersion(name string) string {
 	return FromToolchain(name)
 }

-// ToolchainMax returns the maximum of x and y interpreted as toolchain names,
-// compared using Compare(FromToolchain(x), FromToolchain(y)).
-// If x and y compare equal, Max returns x.
-func ToolchainMax(x, y string) string {
-	if Compare(FromToolchain(x), FromToolchain(y)) < 0 {
-		return y
-	}
-	return x
-}
-
 // Startup records the information that went into the startup-time version switch.
 // It is initialized by switchGoToolchain.
 var Startup struct {
--- a/src/cmd/go/internal/help/helpdoc.go
+++ b/src/cmd/go/internal/help/helpdoc.go
@ -695,7 +695,7 @@ Special-purpose environment variables:
 		The default is GOFIPS140=off, which makes no FIPS-140 changes at all.
 		Other values enable FIPS-140 compliance measures and select alternate
 		versions of the cryptography source code.
-		See https://go.dev/security/fips140 for details.
+		See https://go.dev/doc/security/fips140 for details.
 	GO_EXTLINK_ENABLED
 		Whether the linker should use external linking mode
 		when using -linkmode=auto with code that uses cgo.
--- a/src/cmd/go/internal/lockedfile/internal/filelock/filelock.go
+++ b/src/cmd/go/internal/lockedfile/internal/filelock/filelock.go
@ -8,7 +8,6 @@
 package filelock

 import (
-	"errors"
 	"io/fs"
 )

@ -74,10 +73,3 @@ func (lt lockType) String() string {
 		return "Unlock"
 	}
 }
-
-// IsNotSupported returns a boolean indicating whether the error is known to
-// report that a function is not supported (possibly for a specific input).
-// It is satisfied by errors.ErrUnsupported as well as some syscall errors.
-func IsNotSupported(err error) bool {
-	return errors.Is(err, errors.ErrUnsupported)
-}
--- a/src/cmd/go/internal/modfetch/codehost/git.go
+++ b/src/cmd/go/internal/modfetch/codehost/git.go
@ -387,23 +387,6 @@ func (r *gitRepo) Latest(ctx context.Context) (*RevInfo, error) {
 	return info, nil
 }

-// findRef finds some ref name for the given hash,
-// for use when the server requires giving a ref instead of a hash.
-// There may be multiple ref names for a given hash,
-// in which case this returns some name - it doesn't matter which.
-func (r *gitRepo) findRef(ctx context.Context, hash string) (ref string, ok bool) {
-	refs, err := r.loadRefs(ctx)
-	if err != nil {
-		return "", false
-	}
-	for ref, h := range refs {
-		if h == hash {
-			return ref, true
-		}
-	}
-	return "", false
-}
-
 func (r *gitRepo) checkConfigSHA256(ctx context.Context) bool {
 	if hashType, sha256CfgErr := r.runGit(ctx, "git", "config", "extensions.objectformat"); sha256CfgErr == nil {
 		return "sha256" == strings.TrimSpace(string(hashType))
--- a/src/cmd/go/internal/modfetch/coderepo.go
+++ b/src/cmd/go/internal/modfetch/coderepo.go
@ -1009,10 +1009,6 @@ func LegacyGoMod(modPath string) []byte {
 	return fmt.Appendf(nil, "module %s\n", modfile.AutoQuote(modPath))
 }

-func (r *codeRepo) modPrefix(rev string) string {
-	return r.modPath + "@" + rev
-}
-
 func (r *codeRepo) retractedVersions(ctx context.Context) (func(string) bool, error) {
 	vs, err := r.Versions(ctx, "")
 	if err != nil {
--- a/src/cmd/go/internal/modget/get.go
+++ b/src/cmd/go/internal/modget/get.go
@ -453,7 +453,7 @@ func updateTools(ctx context.Context, queries []*query, opts *modload.WriteOpts)
 		if queries[i].version == "none" {
 			opts.DropTools = append(opts.DropTools, m.Pkgs...)
 		} else {
-			opts.AddTools = append(opts.DropTools, m.Pkgs...)
+			opts.AddTools = append(opts.AddTools, m.Pkgs...)
 		}
 	}
 }
--- a/src/cmd/go/internal/modindex/build.go
+++ b/src/cmd/go/internal/modindex/build.go
@ -10,7 +10,6 @@ package modindex
 import (
 	"bytes"
 	"cmd/go/internal/fsys"
-	"cmd/go/internal/str"
 	"errors"
 	"fmt"
 	"go/ast"
@ -118,96 +117,12 @@ func (ctxt *Context) joinPath(elem ...string) string {
 	return filepath.Join(elem...)
 }

-// splitPathList calls ctxt.SplitPathList (if not nil) or else filepath.SplitList.
-func (ctxt *Context) splitPathList(s string) []string {
-	if f := ctxt.SplitPathList; f != nil {
-		return f(s)
-	}
-	return filepath.SplitList(s)
-}
-
-// isAbsPath calls ctxt.IsAbsPath (if not nil) or else filepath.IsAbs.
-func (ctxt *Context) isAbsPath(path string) bool {
-	if f := ctxt.IsAbsPath; f != nil {
-		return f(path)
-	}
-	return filepath.IsAbs(path)
-}
-
 // isDir reports whether path is a directory.
 func isDir(path string) bool {
 	fi, err := fsys.Stat(path)
 	return err == nil && fi.IsDir()
 }

-// hasSubdir calls ctxt.HasSubdir (if not nil) or else uses
-// the local file system to answer the question.
-func (ctxt *Context) hasSubdir(root, dir string) (rel string, ok bool) {
-	if f := ctxt.HasSubdir; f != nil {
-		return f(root, dir)
-	}
-
-	// Try using paths we received.
-	if rel, ok = hasSubdir(root, dir); ok {
-		return
-	}
-
-	// Try expanding symlinks and comparing
-	// expanded against unexpanded and
-	// expanded against expanded.
-	rootSym, _ := filepath.EvalSymlinks(root)
-	dirSym, _ := filepath.EvalSymlinks(dir)
-
-	if rel, ok = hasSubdir(rootSym, dir); ok {
-		return
-	}
-	if rel, ok = hasSubdir(root, dirSym); ok {
-		return
-	}
-	return hasSubdir(rootSym, dirSym)
-}
-
-// hasSubdir reports if dir is within root by performing lexical analysis only.
-func hasSubdir(root, dir string) (rel string, ok bool) {
-	root = str.WithFilePathSeparator(filepath.Clean(root))
-	dir = filepath.Clean(dir)
-	if !strings.HasPrefix(dir, root) {
-		return "", false
-	}
-	return filepath.ToSlash(dir[len(root):]), true
-}
-
-// gopath returns the list of Go path directories.
-func (ctxt *Context) gopath() []string {
-	var all []string
-	for _, p := range ctxt.splitPathList(ctxt.GOPATH) {
-		if p == "" || p == ctxt.GOROOT {
-			// Empty paths are uninteresting.
-			// If the path is the GOROOT, ignore it.
-			// People sometimes set GOPATH=$GOROOT.
-			// Do not get confused by this common mistake.
-			continue
-		}
-		if strings.HasPrefix(p, "~") {
-			// Path segments starting with ~ on Unix are almost always
-			// users who have incorrectly quoted ~ while setting GOPATH,
-			// preventing it from expanding to $HOME.
-			// The situation is made more confusing by the fact that
-			// bash allows quoted ~ in $PATH (most shells do not).
-			// Do not get confused by this, and do not try to use the path.
-			// It does not exist, and printing errors about it confuses
-			// those users even more, because they think "sure ~ exists!".
-			// The go command diagnoses this situation and prints a
-			// useful error.
-			// On Windows, ~ is used in short names, such as c:\progra~1
-			// for c:\program files.
-			continue
-		}
-		all = append(all, p)
-	}
-	return all
-}
-
 var defaultToolTags, defaultReleaseTags []string

 // NoGoError is the error used by Import to describe a directory
@ -266,114 +181,12 @@ func fileListForExt(p *build.Package, ext string) *[]string {
 	return nil
 }

-var errNoModules = errors.New("not using modules")
-
-func findImportComment(data []byte) (s string, line int) {
-	// expect keyword package
-	word, data := parseWord(data)
-	if string(word) != "package" {
-		return "", 0
-	}
-
-	// expect package name
-	_, data = parseWord(data)
-
-	// now ready for import comment, a // or /* */ comment
-	// beginning and ending on the current line.
-	for len(data) > 0 && (data[0] == ' ' || data[0] == '\t' || data[0] == '\r') {
-		data = data[1:]
-	}
-
-	var comment []byte
-	switch {
-	case bytes.HasPrefix(data, slashSlash):
-		comment, _, _ = bytes.Cut(data[2:], newline)
-	case bytes.HasPrefix(data, slashStar):
-		var ok bool
-		comment, _, ok = bytes.Cut(data[2:], starSlash)
-		if !ok {
-			// malformed comment
-			return "", 0
-		}
-		if bytes.Contains(comment, newline) {
-			return "", 0
-		}
-	}
-	comment = bytes.TrimSpace(comment)
-
-	// split comment into `import`, `"pkg"`
-	word, arg := parseWord(comment)
-	if string(word) != "import" {
-		return "", 0
-	}
-
-	line = 1 + bytes.Count(data[:cap(data)-cap(arg)], newline)
-	return strings.TrimSpace(string(arg)), line
-}
-
 var (
 	slashSlash = []byte("//")
 	slashStar  = []byte("/*")
 	starSlash  = []byte("*/")
-	newline    = []byte("\n")
 )

-// skipSpaceOrComment returns data with any leading spaces or comments removed.
-func skipSpaceOrComment(data []byte) []byte {
-	for len(data) > 0 {
-		switch data[0] {
-		case ' ', '\t', '\r', '\n':
-			data = data[1:]
-			continue
-		case '/':
-			if bytes.HasPrefix(data, slashSlash) {
-				i := bytes.Index(data, newline)
-				if i < 0 {
-					return nil
-				}
-				data = data[i+1:]
-				continue
-			}
-			if bytes.HasPrefix(data, slashStar) {
-				data = data[2:]
-				i := bytes.Index(data, starSlash)
-				if i < 0 {
-					return nil
-				}
-				data = data[i+2:]
-				continue
-			}
-		}
-		break
-	}
-	return data
-}
-
-// parseWord skips any leading spaces or comments in data
-// and then parses the beginning of data as an identifier or keyword,
-// returning that word and what remains after the word.
-func parseWord(data []byte) (word, rest []byte) {
-	data = skipSpaceOrComment(data)
-
-	// Parse past leading word characters.
-	rest = data
-	for {
-		r, size := utf8.DecodeRune(rest)
-		if unicode.IsLetter(r) || '0' <= r && r <= '9' || r == '_' {
-			rest = rest[size:]
-			continue
-		}
-		break
-	}
-
-	word = data[:len(data)-len(rest)]
-	if len(word) == 0 {
-		return nil, nil
-	}
-
-	return word, rest
-}
-
 var dummyPkg build.Package

 // fileInfo records information learned about a file included in a build.
--- a/src/cmd/go/internal/modindex/read.go
+++ b/src/cmd/go/internal/modindex/read.go
@ -1039,11 +1039,6 @@ func (r *reader) string() string {
 	return r.d.stringTableAt(r.int())
 }

-// bool reads the next bool.
-func (r *reader) bool() bool {
-	return r.int() != 0
-}
-
 // tokpos reads the next token.Position.
 func (r *reader) tokpos() token.Position {
 	return token.Position{
--- a/src/cmd/go/internal/modload/buildlist.go
+++ b/src/cmd/go/internal/modload/buildlist.go
@ -658,11 +658,6 @@ func EditBuildList(ctx context.Context, add, mustSelect []module.Version) (chang
 	return changed, nil
 }

-// OverrideRoots edits the global requirement roots by replacing the specific module versions.
-func OverrideRoots(ctx context.Context, replace []module.Version) {
-	requirements = overrideRoots(ctx, requirements, replace)
-}
-
 func overrideRoots(ctx context.Context, rs *Requirements, replace []module.Version) *Requirements {
 	drop := make(map[string]bool)
 	for _, m := range replace {
--- a/src/cmd/go/internal/modload/init.go
+++ b/src/cmd/go/internal/modload/init.go
@ -305,30 +305,6 @@ func (mms *MainModuleSet) Godebugs() []*modfile.Godebug {
 	return nil
 }

-// Toolchain returns the toolchain set on the single module, in module mode,
-// or the go.work file in workspace mode.
-func (mms *MainModuleSet) Toolchain() string {
-	if inWorkspaceMode() {
-		if mms.workFile != nil && mms.workFile.Toolchain != nil {
-			return mms.workFile.Toolchain.Name
-		}
-		return "go" + mms.GoVersion()
-	}
-	if mms != nil && len(mms.versions) == 1 {
-		f := mms.ModFile(mms.mustGetSingleMainModule())
-		if f == nil {
-			// Special case: we are outside a module, like 'go run x.go'.
-			// Assume the local Go version.
-			// TODO(#49228): Clean this up; see loadModFile.
-			return gover.LocalToolchain()
-		}
-		if f.Toolchain != nil {
-			return f.Toolchain.Name
-		}
-	}
-	return "go" + mms.GoVersion()
-}
-
 func (mms *MainModuleSet) WorkFileReplaceMap() map[module.Version]module.Version {
 	return mms.workFileReplaceMap
 }
--- a/src/cmd/go/internal/work/gc.go
+++ b/src/cmd/go/internal/work/gc.go
@ -6,7 +6,6 @@ package work

 import (
 	"bufio"
-	"bytes"
 	"fmt"
 	"internal/buildcfg"
 	"internal/platform"
@ -438,32 +437,6 @@ func (gcToolchain) symabis(b *Builder, a *Action, sfiles []string) (string, erro
 	return symabis, nil
 }

-// toolVerify checks that the command line args writes the same output file
-// if run using newTool instead.
-// Unused now but kept around for future use.
-func toolVerify(a *Action, b *Builder, p *load.Package, newTool string, ofile string, args []any) error {
-	newArgs := make([]any, len(args))
-	copy(newArgs, args)
-	newArgs[1] = base.Tool(newTool)
-	newArgs[3] = ofile + ".new" // x.6 becomes x.6.new
-	if err := b.Shell(a).run(p.Dir, p.ImportPath, nil, newArgs...); err != nil {
-		return err
-	}
-	data1, err := os.ReadFile(ofile)
-	if err != nil {
-		return err
-	}
-	data2, err := os.ReadFile(ofile + ".new")
-	if err != nil {
-		return err
-	}
-	if !bytes.Equal(data1, data2) {
-		return fmt.Errorf("%s and %s produced different output files:\n%s\n%s", filepath.Base(args[1].(string)), newTool, strings.Join(str.StringList(args...), " "), strings.Join(str.StringList(newArgs...), " "))
-	}
-	os.Remove(ofile + ".new")
-	return nil
-}
-
 func (gcToolchain) pack(b *Builder, a *Action, afile string, ofiles []string) error {
 	absOfiles := make([]string, 0, len(ofiles))
 	for _, f := range ofiles {
--- a/src/cmd/go/testdata/script/mod_get_tool_issue74035.txt
+++ b/src/cmd/go/testdata/script/mod_get_tool_issue74035.txt
@ -0,0 +1,25 @@
+# Regression test for https://go.dev/issue/74035.
+go get -tool example.com/foo/cmd/a example.com/foo/cmd/b
+cmp go.mod go.mod.want
+
+-- go.mod --
+module example.com/foo
+go 1.24
+-- go.mod.want --
+module example.com/foo
+
+go 1.24
+
+tool (
+	example.com/foo/cmd/a
+	example.com/foo/cmd/b
+)
+-- cmd/a/a.go --
+package a
+
+func main() {}
+
+-- cmd/b/b.go --
+package b
+
+func main() {}
--- a/src/cmd/internal/archive/archive.go
+++ b/src/cmd/internal/archive/archive.go
@ -498,20 +498,6 @@ func exactly16Bytes(s string) string {
 // architecture-independent object file output
 const HeaderSize = 60

-func ReadHeader(b *bufio.Reader, name string) int {
-	var buf [HeaderSize]byte
-	if _, err := io.ReadFull(b, buf[:]); err != nil {
-		return -1
-	}
-	aname := strings.Trim(string(buf[0:16]), " ")
-	if !strings.HasPrefix(aname, name) {
-		return -1
-	}
-	asize := strings.Trim(string(buf[48:58]), " ")
-	i, _ := strconv.Atoi(asize)
-	return i
-}
-
 func FormatHeader(arhdr []byte, name string, size int64) {
 	copy(arhdr[:], fmt.Sprintf("%-16s%-12d%-6d%-6d%-8o%-10d`\n", name, 0, 0, 0, 0644, size))
 }
--- a/src/cmd/internal/gcprog/gcprog.go
+++ b/src/cmd/internal/gcprog/gcprog.go
@ -56,11 +56,6 @@ func (w *Writer) Debug(out io.Writer) {
 	w.debug = out
 }

-// BitIndex returns the number of bits written to the bit stream so far.
-func (w *Writer) BitIndex() int64 {
-	return w.index
-}
-
 // byte writes the byte x to the output.
 func (w *Writer) byte(x byte) {
 	if w.debug != nil {
@ -98,20 +93,6 @@ func (w *Writer) Ptr(index int64) {
 	w.lit(1)
 }

-// ShouldRepeat reports whether it would be worthwhile to
-// use a Repeat to describe c elements of n bits each,
-// compared to just emitting c copies of the n-bit description.
-func (w *Writer) ShouldRepeat(n, c int64) bool {
-	// Should we lay out the bits directly instead of
-	// encoding them as a repetition? Certainly if count==1,
-	// since there's nothing to repeat, but also if the total
-	// size of the plain pointer bits for the type will fit in
-	// 4 or fewer bytes, since using a repetition will require
-	// flushing the current bits plus at least one byte for
-	// the repeat size and one for the repeat count.
-	return c > 1 && c*n > 4*8
-}
-
 // Repeat emits an instruction to repeat the description
 // of the last n words c times (including the initial description, c+1 times in total).
 func (w *Writer) Repeat(n, c int64) {
@ -163,36 +144,6 @@ func (w *Writer) ZeroUntil(index int64) {
 	w.Repeat(1, skip-1)
 }

-// Append emits the given GC program into the current output.
-// The caller asserts that the program emits n bits (describes n words),
-// and Append panics if that is not true.
-func (w *Writer) Append(prog []byte, n int64) {
-	w.flushlit()
-	if w.debug != nil {
-		fmt.Fprintf(w.debug, "gcprog: append prog for %d ptrs\n", n)
-		fmt.Fprintf(w.debug, "\t")
-	}
-	n1 := progbits(prog)
-	if n1 != n {
-		panic("gcprog: wrong bit count in append")
-	}
-	// The last byte of the prog terminates the program.
-	// Don't emit that, or else our own program will end.
-	for i, x := range prog[:len(prog)-1] {
-		if w.debug != nil {
-			if i > 0 {
-				fmt.Fprintf(w.debug, " ")
-			}
-			fmt.Fprintf(w.debug, "%02x", x)
-		}
-		w.byte(x)
-	}
-	if w.debug != nil {
-		fmt.Fprintf(w.debug, "\n")
-	}
-	w.index += n
-}
-
 // progbits returns the length of the bit stream encoded by the program p.
 func progbits(p []byte) int64 {
 	var n int64
--- a/src/cmd/internal/goobj/objfile.go
+++ b/src/cmd/internal/goobj/objfile.go
@ -635,29 +635,11 @@ func (r *Reader) uint64At(off uint32) uint64 {
 	return binary.LittleEndian.Uint64(b)
 }

-func (r *Reader) int64At(off uint32) int64 {
-	return int64(r.uint64At(off))
-}
-
 func (r *Reader) uint32At(off uint32) uint32 {
 	b := r.BytesAt(off, 4)
 	return binary.LittleEndian.Uint32(b)
 }

-func (r *Reader) int32At(off uint32) int32 {
-	return int32(r.uint32At(off))
-}
-
-func (r *Reader) uint16At(off uint32) uint16 {
-	b := r.BytesAt(off, 2)
-	return binary.LittleEndian.Uint16(b)
-}
-
-func (r *Reader) uint8At(off uint32) uint8 {
-	b := r.BytesAt(off, 1)
-	return b[0]
-}
-
 func (r *Reader) StringAt(off uint32, len uint32) string {
 	b := r.b[off : off+len]
 	if r.readonly {
--- a/src/cmd/internal/obj/arm64/asm7.go
+++ b/src/cmd/internal/obj/arm64/asm7.go
@ -1054,15 +1054,6 @@ var sysInstFields = map[SpecialOperand]struct {
 // Used for padding NOOP instruction
 const OP_NOOP = 0xd503201f

-// pcAlignPadLength returns the number of bytes required to align pc to alignedValue,
-// reporting an error if alignedValue is not a power of two or is out of range.
-func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int {
-	if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) {
-		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue)
-	}
-	return int(-pc & (alignedValue - 1))
-}
-
 // size returns the size of the sequence of machine instructions when p is encoded with o.
 // Usually it just returns o.size directly, in some cases it checks whether the optimization
 // conditions are met, and if so returns the size of the optimized instruction sequence.
@ -1209,10 +1200,6 @@ type codeBuffer struct {
 	data *[]byte
 }

-func (cb *codeBuffer) pc() int64 {
-	return int64(len(*cb.data))
-}
-
 // Write a sequence of opcodes into the code buffer.
 func (cb *codeBuffer) emit(op ...uint32) {
 	for _, o := range op {
--- a/src/cmd/internal/obj/loong64/a.out.go
+++ b/src/cmd/internal/obj/loong64/a.out.go
@ -567,6 +567,11 @@ const (
 	AMOVVF
 	AMOVVD

+	// 2.2.1.3
+	AALSLW
+	AALSLWU
+	AALSLV
+
 	// 2.2.1.8
 	AORN
 	AANDN
@ -743,6 +748,9 @@ const (
 	AFTINTRNEVF
 	AFTINTRNEVD

+	// 3.2.4.2
+	AFSEL
+
 	// LSX and LASX memory access instructions
 	AVMOVQ
 	AXVMOVQ
--- a/src/cmd/internal/obj/loong64/anames.go
+++ b/src/cmd/internal/obj/loong64/anames.go
@ -125,6 +125,9 @@ var Anames = []string{
 	"MOVDV",
 	"MOVVF",
 	"MOVVD",
+	"ALSLW",
+	"ALSLWU",
+	"ALSLV",
 	"ORN",
 	"ANDN",
 	"AMSWAPB",
@ -261,6 +264,7 @@ var Anames = []string{
 	"FTINTRNEWD",
 	"FTINTRNEVF",
 	"FTINTRNEVD",
+	"FSEL",
 	"VMOVQ",
 	"XVMOVQ",
 	"VADDB",
--- a/src/cmd/internal/obj/loong64/asm.go
+++ b/src/cmd/internal/obj/loong64/asm.go
@ -154,6 +154,9 @@ var optab = []Optab{
 	{AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 37, 4, 0, 0},
 	{AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 37, 4, 0, 0},

+	{AFSEL, C_FCCREG, C_FREG, C_FREG, C_FREG, C_NONE, 33, 4, 0, 0},
+	{AFSEL, C_FCCREG, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0},
+
 	{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
 	{AMOVWU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
 	{AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
@ -422,6 +425,8 @@ var optab = []Optab{
 	{APRELD, C_SOREG, C_U5CON, C_NONE, C_NONE, C_NONE, 47, 4, 0, 0},
 	{APRELDX, C_SOREG, C_DCON, C_U5CON, C_NONE, C_NONE, 48, 20, 0, 0},

+	{AALSLV, C_U3CON, C_REG, C_REG, C_REG, C_NONE, 64, 4, 0, 0},
+
 	{obj.APCALIGN, C_U12CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
 	{obj.APCDATA, C_32CON, C_NONE, C_NONE, C_32CON, C_NONE, 0, 0, 0, 0},
 	{obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0},
@ -729,10 +734,6 @@ func isint32(v int64) bool {
 	return int64(int32(v)) == v
 }

-func isuint32(v uint64) bool {
-	return uint64(uint32(v)) == v
-}
-
 func (c *ctxt0) aclass(a *obj.Addr) int {
 	switch a.Type {
 	case obj.TYPE_NONE:
@ -1496,6 +1497,10 @@ func buildop(ctxt *obj.Link) {
 		case ABFPT:
 			opset(ABFPF, r0)

+		case AALSLV:
+			opset(AALSLW, r0)
+			opset(AALSLWU, r0)
+
 		case AMOVW,
 			AMOVD,
 			AMOVF,
@ -1515,6 +1520,7 @@ func buildop(ctxt *obj.Link) {
 			AWORD,
 			APRELD,
 			APRELDX,
+			AFSEL,
 			obj.ANOP,
 			obj.ATEXT,
 			obj.AFUNCDATA,
@ -1952,6 +1958,10 @@ func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 {
 	return op | (r2&0x1F)<<5 | (r3&0x1F)<<0
 }

+func OP_2IRRR(op uint32, i uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
+	return op | (i&0x3)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4&0x1F)<<0
+}
+
 func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 {
 	return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F)
 }
@ -2381,6 +2391,16 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
 		}
 		o1 = OP_6IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))

+	case 33: // fsel ca, fk, [fj], fd
+		ca := uint32(p.From.Reg)
+		fk := uint32(p.Reg)
+		fd := uint32(p.To.Reg)
+		fj := fd
+		if len(p.RestArgs) > 0 {
+			fj = uint32(p.GetFrom3().Reg)
+		}
+		o1 = 0x340<<18 | (ca&0x7)<<15 | (fk&0x1F)<<10 | (fj&0x1F)<<5 | (fd & 0x1F)
+
 	case 34: // mov $con,fr
 		v := c.regoff(&p.From)
 		a := AADDU
@ -2721,6 +2741,14 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
 	case 62: // rdtimex rd, rj
 		o1 = OP_RR(c.oprr(p.As), uint32(p.To.Reg), uint32(p.RegTo2))

+	case 64: // alsl rd, rj, rk, sa2
+		sa := p.From.Offset - 1
+		if sa < 0 || sa > 3 {
+			c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p)
+		}
+		r := p.GetFrom3().Reg
+		o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg))
+
 	case 65: // mov sym@GOT, r ==> pcalau12i + ld.d
 		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
 		c.cursym.AddRel(c.ctxt, obj.Reloc{
@ -4248,6 +4276,19 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
 	return 0
 }

+func (c *ctxt0) opirrr(a obj.As) uint32 {
+	switch a {
+	case AALSLW:
+		return 0x2 << 17 // alsl.w
+	case AALSLWU:
+		return 0x3 << 17 // alsl.wu
+	case AALSLV:
+		return 0x16 << 17 // alsl.d
+	}
+
+	return 0
+}
+
 func (c *ctxt0) opirir(a obj.As) uint32 {
 	switch a {
 	case ABSTRINSW:
--- a/src/cmd/internal/obj/loong64/doc.go
+++ b/src/cmd/internal/obj/loong64/doc.go
@ -268,6 +268,27 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate)
      bits[11:1]:  block size, the value range is [16, 1024], and it must be an integer multiple of 16
      bits[20:12]: block num, the value range is [1, 256]
      bits[36:21]: stride, the value range is [0, 0xffff]
+
+4. ShiftAdd instructions
+    Mapping between Go and platform assembly:
+                Go assembly            |    platform assembly
+     ALSL.W/WU/V $Imm, Rj, Rk, Rd      |    alsl.w/wu/d rd, rj, rk, $imm
+
+    Instruction encoding format is as follows:
+
+	| 31 ~ 17 | 16 ~ 15 | 14 ~ 10 | 9 ~ 5 | 4 ~ 0 |
+	|  opcode |   sa2   |   rk    |   rj  |   rd  |
+
+    The alsl.w/wu/v series of instructions shift the data in rj left by sa+1, add the value
+    in rk, and write the result to rd.
+
+    To allow programmers to directly write the desired shift amount in assembly code, we actually write
+    the value of sa2+1 in the assembly code and then include the value of sa2 in the instruction encoding.
+
+    For example:
+
+            Go assembly      | instruction Encoding
+        ALSLV $4, r4, r5, R6 |      002d9486
 */

 package loong64
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				A database driver may implement [RowsColumnScanner] to entirely override `Scan` behavior.
				`@ -0,0 +1 @@`
				`Added context aware dial functions for TCP, UDP, IP and Unix networks.`