[dev.simd] all: merge master (7a1679d) into dev.simd

Conflicts: - src/cmd/compile/internal/amd64/ssa.go - src/cmd/compile/internal/ssa/rewriteAMD64.go - src/internal/buildcfg/exp.go - src/internal/cpu/cpu.go - src/internal/cpu/cpu_x86.go - src/internal/goexperiment/flags.go Merge List: + 2025-08-04 7a1679d7ae cmd/compile: move s390x over to new bounds check strategy + 2025-08-04 95693816a5 cmd/compile: move riscv64 over to new bounds check strategy + 2025-08-04 d7bd7773eb go/parser: remove safePos + 2025-08-04 4b6cbc377f cmd/cgo/internal/test: use (syntactic) constant for C array bound + 2025-08-03 b2960e3580 cmd/internal/obj/loong64: add {V,XV}{BITCLR/BITSET/BITREV}[I].{B/H/W/D} instructions support + 2025-08-03 abeeef1c08 cmd/compile/internal/test: fix typo in comments + 2025-08-03 d44749b65b cmd/internal/obj/loong64: add [X]VLDREPL.{B/H/W/D} instructions support + 2025-08-03 d6beda863e runtime: add reference to debugPinnerV1 + 2025-08-01 4ab1aec007 cmd/go: modload should use a read-write lock to improve concurrency + 2025-08-01 e666972a67 runtime: deduplicate Windows stdcall + 2025-08-01 ef40549786 runtime,syscall: move loadlibrary and getprocaddress to syscall + 2025-08-01 336931a4ca cmd/go: use os.Rename to move files on Windows + 2025-08-01 eef5f8d930 cmd/compile: enforce that locals are always accessed with SP base register + 2025-08-01 e071617222 cmd/compile: optimize multiplication rules on loong64 + 2025-07-31 eb7f515c4d cmd/compile: use generated loops instead of DUFFZERO on amd64 + 2025-07-31 c0ee2fd4e3 cmd/go: explicitly reject module paths "go" and "toolchain" + 2025-07-30 a4d99770c0 runtime/metrics: add cleanup and finalizer queue metrics + 2025-07-30 70a2ff7648 runtime: add cgo call benchmark + 2025-07-30 69338a335a cmd/go/internal/gover: fix ModIsPrerelease for toolchain versions + 2025-07-30 cedf63616a cmd/compile: add floating point min/max intrinsics on s390x + 2025-07-30 82a1921c3b all: remove redundant Swiss prefixes + 2025-07-30 2ae059ccaf all: remove GOEXPERIMENT=swissmap + 2025-07-30 cc571dab91 cmd/compile: deduplicate instructions when rewrite func results + 2025-07-30 2174a7936c crypto/tls: use standard chacha20-poly1305 cipher suite names + 2025-07-30 8330fb48a6 cmd/compile: move mips32 over to new bounds check strategy + 2025-07-30 9f9d7b50e8 cmd/compile: move mips64 over to new bounds check strategy + 2025-07-30 5216fd570e cmd/compile: move loong64 over to new bounds check strategy + 2025-07-30 89a0af86b8 cmd/compile: allow ops to specify clobbering input registers + 2025-07-30 5e94d72158 cmd/compile: simplify zerorange on arm64 + 2025-07-30 8cd85e602a cmd/compile: check domination of loop return in both controls + 2025-07-30 cefaed0de0 reflect: fix noswiss builder + 2025-07-30 3aa1b00081 regexp: fix compiling alternate patterns of different fold case literals + 2025-07-30 b1e933d955 cmd/compile: avoid extending when already sufficiently masked on loong64 + 2025-07-29 880ca333d7 cmd/compile: removing log2uint32 function + 2025-07-29 1513661dc3 cmd/compile: simplify logX implementations + 2025-07-29 bd94ae8903 cmd/compile: use unsigned power-of-two detector for unsigned mod + 2025-07-29 f3582fc80e cmd/compile: add unsigned power-of-two detector + 2025-07-29 f7d167fe71 internal/abi: move direct/indirect flag from Kind to TFlag + 2025-07-29 e0b07dc22e os/exec: fix incorrect expansion of "", "." and ".." in LookPath + 2025-07-29 25816d401c internal/goexperiment: delete RangeFunc goexperiment + 2025-07-29 7961bf71f8 internal/goexperiment: delete CacheProg goexperiment + 2025-07-29 e15a14c4dd sync: remove synchashtriemap GOEXPERIMENT + 2025-07-29 7dccd6395c cmd/compile: move arm32 over to new bounds check strategy + 2025-07-29 d79405a344 runtime: only deduct assist credit for arenas during GC + 2025-07-29 19a086f716 cmd/go/internal/telemetrystats: count goexperiments + 2025-07-29 aa95ab8215 image: fix formatting of godoc link + 2025-07-29 4c854b7a3e crypto/elliptic: change a variable name that have the same name as keywords + 2025-07-28 b10eb1d042 cmd/compile: simplify zerorange on amd64 + 2025-07-28 f8eae7a3c3 os/user: fix tests to pass on non-english Windows + 2025-07-28 0984264471 internal/poll: remove msg field from Windows' poll.operation + 2025-07-28 d7b4114346 internal/poll: remove rsan field from Windows' poll.operation + 2025-07-28 361b1ab41f internal/poll: remove sa field from Windows' poll.operation + 2025-07-28 9b6bd64e46 internal/poll: remove qty and flags fields from Windows' poll.operation + 2025-07-28 cd3655a824 internal/runtime/maps: fix spelling errors in comments + 2025-07-28 d5dc36af45 runtime: remove openbsd/mips64 related code + 2025-07-28 64ba72474d errors: omit redundant nil check in type assertion for Join + 2025-07-28 e151db3e06 all: omit unnecessary type conversions + 2025-07-28 4569255f8c cmd/compile: cleanup SelectN rules by indexing into args + 2025-07-28 94645d2413 cmd/compile: rewrite cmov(x, x, cond) into x + 2025-07-28 10c5cf68d4 net/http: add proper panic message + 2025-07-28 46b5839231 test/codegen: fix failing condmove wasm tests + 2025-07-28 98f301cf68 runtime,syscall: move SyscallX implementations from runtime to syscall + 2025-07-28 c7ed3a1c5a internal/runtime/syscall/windows: factor out code from runtime + 2025-07-28 e81eac19d3 hash/crc32: fix incorrect checksums with avx512+race + 2025-07-25 6fbad4be75 cmd/compile: remove no-longer-necessary call to calculateDepths + 2025-07-25 5045fdd8ff cmd/compile: fix containsUnavoidableCall computation + 2025-07-25 d28b27cd8e go/types, types2: use nil to represent incomplete explicit aliases + 2025-07-25 7b53d8d06e cmd/compile/internal/types2: add loaded state between loader calls and constraint expansion + 2025-07-25 374e3be2eb os/user: user random name for the test user account + 2025-07-25 1aa154621d runtime: rename scanobject to scanObject + 2025-07-25 41b429881a runtime: duplicate scanobject in greentea and non-greentea files + 2025-07-25 aeb256e98a cmd/compile: remove unused arg from gorecover + 2025-07-25 08376e1a9c runtime: iterate through inlinings when processing recover() + 2025-07-25 c76c3abc54 encoding/json: fix truncated Token error regression in goexperiment.jsonv2 + 2025-07-25 ebdbfccd98 encoding/json/jsontext: preserve buffer capacity in Encoder.Reset + 2025-07-25 91c4f0ccd5 reflect: avoid a bounds check in stack-constrained code + 2025-07-24 3636ced112 encoding/json: fix extra data regression under goexperiment.jsonv2 + 2025-07-24 a6eec8bdc7 encoding/json: reduce error text regressions under goexperiment.jsonv2 + 2025-07-24 0fa88dec1e time: remove redundant uint32 conversion in split + 2025-07-24 ada30b8248 internal/buildcfg: add ability to get GORISCV64 variable in GOGOARCH + 2025-07-24 6f6c6c5782 cmd/internal/obj: rip out argp adjustment for wrapper frames + 2025-07-24 7b50024330 runtime: detect successful recovers differently + 2025-07-24 7b9de668bd unicode/utf8: skip ahead during ascii runs in Valid/ValidString + 2025-07-24 076eae436e cmd/compile: move amd64 and 386 over to new bounds check strategy + 2025-07-24 f703dc5bef cmd/compile: add missing StringLen rule in prove + 2025-07-24 394d0bee8d cmd/compile: move arm64 over to new bounds check strategy + 2025-07-24 3024785b92 cmd/compile,runtime: remember idx+len for bounds check failure with less code + 2025-07-24 741a19ab41 runtime: move bounds check constants to internal/abi + 2025-07-24 ce05ad448f cmd/compile: rewrite condselects into doublings and halvings + 2025-07-24 fcd28070fe cmd/compile: add opt branchelim to rewrite some CondSelect into math + 2025-07-24 f32cf8e4b0 cmd/compile: learn transitive proofs for safe unsigned subs + 2025-07-24 d574856482 cmd/compile: learn transitive proofs for safe negative signed adds + 2025-07-24 1a72920f09 cmd/compile: learn transitive proofs for safe positive signed adds + 2025-07-24 e5f202bb60 cmd/compile: learn transitive proofs for safe unsigned adds + 2025-07-24 bd80f74bc1 cmd/compile: fold shift through AND for slice operations + 2025-07-24 5c45fe1385 internal/runtime/syscall: rename to internal/runtime/syscall/linux + 2025-07-24 592c2db868 cmd/compile: improve loopRotate to handle nested loops + 2025-07-24 dcb479c2f9 cmd/compile: optimize slice bounds checking with SUB/SUBconst comparisons + 2025-07-24 f11599b0b9 internal/poll: remove handle field from Windows' poll.operation + 2025-07-24 f7432e0230 internal/poll: remove fd field from Windows' poll.operation + 2025-07-24 e84ed38641 runtime: add benchmark for small-size memmory operation + 2025-07-24 18dbe5b941 hash/crc32: add AVX512 IEEE CRC32 calculation + 2025-07-24 c641900f72 cmd/compile: prefer base.Fatalf to panic in dwarfgen + 2025-07-24 d71d8aeafd cmd/internal/obj/s390x: add MVCLE instruction + 2025-07-24 b6cf1d94dc runtime: optimize memclr on mips64x + 2025-07-24 a8edd99479 runtime: improvement in memclr for s390x + 2025-07-24 bd04f65511 internal/runtime/exithook: fix a typo + 2025-07-24 5c8624a396 cmd/internal/goobj: make error output clear + 2025-07-24 44d73dfb4e cmd/go/internal/doc: clean up after merge with cmd/internal/doc + 2025-07-24 bd446662dd cmd/internal/doc: merge with cmd/go/internal/doc + 2025-07-24 da8b50c830 cmd/doc: delete + 2025-07-24 6669aa3b14 runtime: randomize heap base address + 2025-07-24 26338a7f69 cmd/compile: use better fatal message for staticValue1 + 2025-07-24 8587ba272e cmd/cgo: compare malloc return value to NULL instead of literal 0 + 2025-07-24 cae45167b7 go/types, types2: better error messages for certain type mismatches + 2025-07-24 2ddf542e4c cmd/compile: use ,ok return idiom for sparsemap.get + 2025-07-24 6505fcbd0a cmd/compile: use generics for sparse map + 2025-07-24 14f5eb7812 cmd/api: rerun updategolden + 2025-07-24 52b6d7f67a runtime: drop NetBSD kernel bug sysmon workaround fixed in NetBSD 9.2 + 2025-07-24 1ebebf1cc1 cmd/go: clean should respect workspaces + 2025-07-24 6536a93547 encoding/json/jsontext: preserve buffer capacity in Decoder.Reset + 2025-07-24 efc37e97c0 cmd/go: always return the cached path from go tool -n + 2025-07-23 98a031193b runtime: check TestUsingVDSO ExitError type assertion + 2025-07-23 6bb42997c8 doc/next: initialize + 2025-07-23 2696a11a97 internal/goversion: update Version to 1.26 + 2025-07-23 489868f776 cmd/link: scope test to linux & net.sendFile + 2025-07-22 71c2bf5513 cmd/compile: fix loclist for heap return vars without optimizations + 2025-07-22 c74399e7f5 net: correct comment for ListenConfig.ListenPacket + 2025-07-22 4ed9943b26 all: go fmt + 2025-07-22 1aaf7422f1 cmd/internal/objabi: remove redundant word in comment + 2025-07-21 d5ec0815e6 runtime: relax TestMemoryLimitNoGCPercent a bit + 2025-07-21 f7cc61e7d7 cmd/compile: for arm64 epilog, do SP increment with a single instruction + 2025-07-21 5dac42363b runtime: fix asan wrapper for riscv64 + 2025-07-21 e5502e0959 cmd/go: check subcommand properties + 2025-07-19 2363897932 cmd/internal/obj: enable got pcrel itype in fips140 for riscv64 + 2025-07-19 e32255fcc0 cmd/compile/internal/ssa: restrict architectures for TestDebugLines_74576 + 2025-07-18 0451816430 os: revert the use of AddCleanup to close files and roots + 2025-07-18 34b70684ba go/types: infer correct type for y in append(bytes, y...) + 2025-07-17 66536242fc cmd/compile/internal/escape: improve DWARF .debug_line numbering for literal rewriting optimizations + 2025-07-16 385000b004 runtime: fix idle time double-counting bug + 2025-07-16 f506ad2644 cmd/compile/internal/escape: speed up analyzing some functions with many closures + 2025-07-16 9c507e7942 cmd/link, runtime: on Wasm, put only function index in method table and func table + 2025-07-16 9782dcfd16 runtime: use 32-bit function index on Wasm + 2025-07-16 c876bf9346 cmd/internal/obj/wasm: use 64-bit instructions for indirect calls + 2025-07-15 b4309ece66 cmd/internal/doc: upgrade godoc pkgsite to 01b046e + 2025-07-15 75a19dbcd7 runtime: use memclrNoHeapPointers to clear inline mark bits + 2025-07-15 6d4a91c7a5 runtime: only clear inline mark bits on span alloc if necessary + 2025-07-15 0c6296ab12 runtime: have mergeInlineMarkBits also clear the inline mark bits + 2025-07-15 397d2117ec runtime: merge inline mark bits with gcmarkBits 8 bytes at a time + 2025-07-15 7dceabd3be runtime/maps: fix typo in group.go comment (instrinsified -> intrinsified) + 2025-07-15 d826bf4d74 os: remove useless error check + 2025-07-14 bb07e55aff runtime: expand GOMAXPROCS documentation + 2025-07-14 9159cd4ec6 encoding/json: decompose legacy options + 2025-07-14 c6556b8eb3 encoding/json/v2: add security section to doc + 2025-07-11 6ebb5f56d9 runtime: gofmt after CL 643897 and CL 662455 + 2025-07-11 1e48ca7020 encoding/json: remove legacy option to EscapeInvalidUTF8 + 2025-07-11 a0a99cb22b encoding/json/v2: report wrapped io.ErrUnexpectedEOF + 2025-07-11 9d04122d24 crypto/rsa: drop contradictory promise to keep PublicKey modulus secret + 2025-07-11 1ca23682dd crypto/rsa: fix documentation formatting + 2025-07-11 4bc3373c8e runtime: turn off large memmove tests under asan/msan Change-Id: I1e32d964eba770b85421efb86b305a2242f24466
2026-02-06 18:00:01 +00:00 · 2025-08-04 15:07:05 -04:00 · 2025-08-04 15:07:05 -04:00 · 775fb52745
commit 775fb52745
parent 6b9b59e144 7a1679d7ae
526 changed files with 12758 additions and 15109 deletions
--- a/doc/next/1-intro.md
+++ b/doc/next/1-intro.md
@ -0,0 +1,8 @@
+<style>
+  main ul li { margin: 0.5em 0; }
+</style>
+
+## DRAFT RELEASE NOTES — Introduction to Go 1.26 {#introduction}
+
+**Go 1.26 is not yet released. These are work-in-progress release notes.
+Go 1.26 is expected to be released in February 2026.**
--- a/doc/next/2-language.md
+++ b/doc/next/2-language.md
@ -0,0 +1,3 @@
+## Changes to the language {#language}
+
+
--- a/doc/next/3-tools.md
+++ b/doc/next/3-tools.md
@ -0,0 +1,11 @@
+## Tools {#tools}
+
+### Go command {#go-command}
+
+<!-- go.dev/issue/74667 -->
+`cmd/doc`, and `go tool doc` have been deleted. `go doc` can be used as
+a replacement for `go tool doc`: it takes the same flags and arguments and
+has the same behavior.
+
+### Cgo {#cgo}
+
--- a/doc/next/4-runtime.md
+++ b/doc/next/4-runtime.md
@ -0,0 +1 @@
+## Runtime {#runtime}
--- a/doc/next/5-toolchain.md
+++ b/doc/next/5-toolchain.md
@ -0,0 +1,7 @@
+## Compiler {#compiler}
+
+## Assembler {#assembler}
+
+## Linker {#linker}
+
+
--- a/doc/next/6-stdlib/0-heading.md
+++ b/doc/next/6-stdlib/0-heading.md
@ -0,0 +1,2 @@
+## Standard library {#library}
+
--- a/doc/next/6-stdlib/99-minor/0-heading.md
+++ b/doc/next/6-stdlib/99-minor/0-heading.md
@ -0,0 +1,10 @@
+### Minor changes to the library {#minor_library_changes}
+
+#### go/types
+
+The `Var.Kind` method returns an enumeration of type `VarKind` that
+classifies the variable (package-level, local, receiver, parameter,
+result, or struct field). See issue #70250.
+
+Callers of `NewVar` or `NewParam` are encouraged to call `Var.SetKind`
+to ensure that this attribute is set correctly in all cases.
--- a/doc/next/6-stdlib/99-minor/README
+++ b/doc/next/6-stdlib/99-minor/README
@ -0,0 +1 @@
+API changes and other small changes to the standard library go here.
--- a/doc/next/7-ports.md
+++ b/doc/next/7-ports.md
@ -0,0 +1,2 @@
+## Ports {#ports}
+
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@ -693,14 +693,14 @@ func bmIndexRuneUnicode(rt *unicode.RangeTable, needle rune) func(b *testing.B,
 	for _, r16 := range rt.R16 {
 		for r := rune(r16.Lo); r <= rune(r16.Hi); r += rune(r16.Stride) {
 			if r != needle {
-				rs = append(rs, rune(r))
+				rs = append(rs, r)
 			}
 		}
 	}
 	for _, r32 := range rt.R32 {
 		for r := rune(r32.Lo); r <= rune(r32.Hi); r += rune(r32.Stride) {
 			if r != needle {
-				rs = append(rs, rune(r))
+				rs = append(rs, r)
 			}
 		}
 	}
--- a/src/cmd/api/testdata/src/pkg/p1/golden.txt
+++ b/src/cmd/api/testdata/src/pkg/p1/golden.txt
@ -1,6 +1,6 @@
+pkg p1, const A //deprecated
 pkg p1, const A = 1
 pkg p1, const A ideal-int
-pkg p1, const A //deprecated
 pkg p1, const A64 = 1
 pkg p1, const A64 int64
 pkg p1, const AIsLowerA = 11
@ -25,8 +25,8 @@ pkg p1, func TakesFunc(func(int) int)
 pkg p1, method (*B) JustOnB()
 pkg p1, method (*B) OnBothTandBPtr()
 pkg p1, method (*Embedded) OnEmbedded()
-pkg p1, method (*S2) SMethod(int8, int16, int64)
 pkg p1, method (*S2) SMethod //deprecated
+pkg p1, method (*S2) SMethod(int8, int16, int64)
 pkg p1, method (*T) JustOnT()
 pkg p1, method (*T) OnBothTandBPtr()
 pkg p1, method (B) OnBothTandBVal()
@ -53,8 +53,8 @@ pkg p1, type Error interface { Error, Temporary }
 pkg p1, type Error interface, Error() string
 pkg p1, type Error interface, Temporary() bool
 pkg p1, type FuncType func(int, int, string) (*B, error)
-pkg p1, type I interface, Get(string) int64
 pkg p1, type I interface, Get //deprecated
+pkg p1, type I interface, Get(string) int64
 pkg p1, type I interface, GetNamed(string) int64
 pkg p1, type I interface, Name() string
 pkg p1, type I interface, PackageTwoMeth()
@ -63,9 +63,9 @@ pkg p1, type I interface, unexported methods
 pkg p1, type MyInt int
 pkg p1, type Namer interface { Name }
 pkg p1, type Namer interface, Name() string
+pkg p1, type Private //deprecated
 pkg p1, type Private interface, X()
 pkg p1, type Private interface, unexported methods
-pkg p1, type Private //deprecated
 pkg p1, type Public interface { X, Y }
 pkg p1, type Public interface, X()
 pkg p1, type Public interface, Y()
@ -84,8 +84,8 @@ pkg p1, type TPtrExported struct
 pkg p1, type TPtrExported struct, embedded *Embedded
 pkg p1, type TPtrUnexported struct
 pkg p1, type Time struct
-pkg p1, type URL struct
 pkg p1, type URL //deprecated
+pkg p1, type URL struct
 pkg p1, var Byte uint8
 pkg p1, var ByteConv []uint8
 pkg p1, var ByteFunc func(uint8) int32
@ -97,8 +97,8 @@ pkg p1, var StrConv string
 pkg p1, var V string
 pkg p1, var V1 uint64
 pkg p1, var V2 p2.Twoer
-pkg p1, var VError Error
 pkg p1, var VError //deprecated
+pkg p1, var VError Error
 pkg p1, var X I
 pkg p1, var X0 int64
 pkg p1, var Y int
--- a/src/cmd/api/testdata/src/pkg/p2/golden.txt
+++ b/src/cmd/api/testdata/src/pkg/p2/golden.txt
@ -1,8 +1,7 @@
-pkg p2, func F() string
 pkg p2, func F //deprecated
+pkg p2, func F() string
 pkg p2, func G() Twoer
 pkg p2, func NewError(string) error
 pkg p2, type Twoer interface { PackageTwoMeth }
-pkg p2, type Twoer interface, PackageTwoMeth()
 pkg p2, type Twoer interface, PackageTwoMeth //deprecated
-
+pkg p2, type Twoer interface, PackageTwoMeth()
--- a/src/cmd/api/testdata/src/pkg/p4/golden.txt
+++ b/src/cmd/api/testdata/src/pkg/p4/golden.txt
@ -1,6 +1,6 @@
-pkg p4, func NewPair[$0 interface{ M }, $1 interface{ ~int }]($0, $1) Pair[$0, $1]
-pkg p4, method (Pair[$0, $1]) Second() $1
-pkg p4, method (Pair[$0, $1]) First() $0
-pkg p4, type Pair[$0 interface{ M }, $1 interface{ ~int }] struct
-pkg p4, func Clone[$0 interface{ ~[]$1 }, $1 interface{}]($0) $0
 pkg p4, func Clone //deprecated
+pkg p4, func Clone[$0 interface{ ~[]$1 }, $1 interface{}]($0) $0
+pkg p4, func NewPair[$0 interface{ M }, $1 interface{ ~int }]($0, $1) Pair[$0, $1]
+pkg p4, method (Pair[$0, $1]) First() $0
+pkg p4, method (Pair[$0, $1]) Second() $1
+pkg p4, type Pair[$0 interface{ M }, $1 interface{ ~int }] struct
--- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s
+++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s
@ -510,6 +510,16 @@ lable2:
 	VMOVQ		V3.W[1], V7.W4  // 67e4f772
 	VMOVQ		V4.V[0], V6.V2  // 86f0f772

+	// Load data from memory and broadcast to each element of a vector register: VMOVQ    offset(Rj), <Vd>.<T>
+	VMOVQ		(R4), V0.B16	// 80008030
+	VMOVQ		1(R4), V1.H8	// 81044030
+	VMOVQ		2(R4), V2.W4	// 82082030
+	VMOVQ		3(R4), V3.V2	// 830c1030
+	XVMOVQ		(R4), X0.B32	// 80008032
+	XVMOVQ		1(R4), X1.H16	// 81044032
+	XVMOVQ		2(R4), X2.W8	// 82082032
+	XVMOVQ		3(R4), X3.V4	// 830c1032
+
 	// VSEQ{B,H,W,V}, XVSEQ{B,H,W,V} instruction
 	VSEQB		V1, V2, V3      // 43040070
 	VSEQH		V1, V2, V3      // 43840070
@ -1035,3 +1045,53 @@ lable2:
 	PRELD	(R4), $0		// 8000c02a
 	PRELD	-1(R4), $8		// 88fcff2a
 	PRELD	8(R4),  $31		// 9f20c02a
+
+	// [X]{VBITCLR/VBITSET/VBITREV}{B,H,W,V} instructions
+	VBITCLRB	V1, V2, V3	// 43040c71
+	VBITCLRH	V1, V2, V3	// 43840c71
+	VBITCLRW	V1, V2, V3	// 43040d71
+	VBITCLRV	V1, V2, V3	// 43840d71
+	VBITSETB	V1, V2, V3	// 43040e71
+	VBITSETH	V1, V2, V3	// 43840e71
+	VBITSETW	V1, V2, V3	// 43040f71
+	VBITSETV	V1, V2, V3	// 43840f71
+	VBITREVB	V1, V2, V3	// 43041071
+	VBITREVH	V1, V2, V3	// 43841071
+	VBITREVW	V1, V2, V3	// 43041171
+	VBITREVV	V1, V2, V3	// 43841171
+	XVBITCLRB	X3, X2, X1	// 410c0c75
+	XVBITCLRH	X3, X2, X1	// 418c0c75
+	XVBITCLRW	X3, X2, X1	// 410c0d75
+	XVBITCLRV	X3, X2, X1	// 418c0d75
+	XVBITSETB	X3, X2, X1	// 410c0e75
+	XVBITSETH	X3, X2, X1	// 418c0e75
+	XVBITSETW	X3, X2, X1	// 410c0f75
+	XVBITSETV	X3, X2, X1	// 418c0f75
+	XVBITREVB	X3, X2, X1	// 410c1075
+	XVBITREVH	X3, X2, X1	// 418c1075
+	XVBITREVW	X3, X2, X1	// 410c1175
+	XVBITREVV	X3, X2, X1	// 418c1175
+	VBITCLRB	$7, V2, V3	// 433c1073
+	VBITCLRH	$15, V2, V3	// 437c1073
+	VBITCLRW	$31, V2, V3	// 43fc1073
+	VBITCLRV	$63, V2, V3	// 43fc1173
+	VBITSETB	$7, V2, V3	// 433c1473
+	VBITSETH	$15, V2, V3	// 437c1473
+	VBITSETW	$31, V2, V3	// 43fc1473
+	VBITSETV	$63, V2, V3	// 43fc1573
+	VBITREVB	$7, V2, V3	// 433c1873
+	VBITREVH	$15, V2, V3	// 437c1873
+	VBITREVW	$31, V2, V3	// 43fc1873
+	VBITREVV	$63, V2, V3	// 43fc1973
+	XVBITCLRB	$7, X2, X1	// 413c1077
+	XVBITCLRH	$15, X2, X1	// 417c1077
+	XVBITCLRW	$31, X2, X1	// 41fc1077
+	XVBITCLRV	$63, X2, X1	// 41fc1177
+	XVBITSETB	$7, X2, X1	// 413c1477
+	XVBITSETH	$15, X2, X1	// 417c1477
+	XVBITSETW	$31, X2, X1	// 41fc1477
+	XVBITSETV	$63, X2, X1	// 41fc1577
+	XVBITREVB	$7, X2, X1	// 413c1877
+	XVBITREVH	$15, X2, X1	// 417c1877
+	XVBITREVW	$31, X2, X1	// 41fc1877
+	XVBITREVV	$63, X2, X1	// 41fc1977
--- a/src/cmd/asm/internal/asm/testdata/s390x.s
+++ b/src/cmd/asm/internal/asm/testdata/s390x.s
@ -263,10 +263,15 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
 	NC	$8, (R15), n-8(SP)       // d407f010f000
 	OC	$8, (R15), n-8(SP)       // d607f010f000
 	MVC	$8, (R15), n-8(SP)       // d207f010f000
+	MVC	$256, 8192(R1), 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000
 	MVCIN	$8, (R15), n-8(SP)       // e807f010f000
 	CLC	$8, (R15), n-8(SP)       // d507f000f010
 	XC	$256, -8(R15), -8(R15)   // b90400afc2a8fffffff8d7ffa000a000
-	MVC	$256, 8192(R1), 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000
+	MVCLE   0, R4, R6                // a8640000
+	MVCLE   4095, R4, R6             // a8640fff
+	MVCLE   $4095, R4, R6            // a8640fff
+	MVCLE   (R3), R4, R6             // a8643000
+	MVCLE   10(R3), R4, R6           // a864300a

 	CMP	R1, R2                 // b9200012
 	CMP	R3, $32767             // a73f7fff
@ -535,6 +540,18 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
 	VSTRCZBS V18, V20, V22, V24	// e78240306f8a
 	VSTRCZHS V18, V20, V22, V24	// e78241306f8a
 	VSTRCZFS V18, V20, V22, V24	// e78242306f8a
+	VFMAXSB	$1, V2, V3, V4          // e742301020ef
+	WFMAXSB	$2, V5, V6, V7          // e775602820ef
+	WFMAXSB	$2, F5, F6, F7          // e775602820ef
+	VFMAXDB	$3, V8, V9, V10		// e7a8903030ef
+	WFMAXDB	$4, V11, V12, V13	// e7dbc04830ef
+	WFMAXDB	$4, F11, F12, F13	// e7dbc04830ef
+	VFMINSB	$7, V14, V15, V16	// e70ef07028ee
+	WFMINSB	$8, V17, V18, V19	// e73120882eee
+	WFMINSB	$8, F1, F2, F3		// e731208820ee
+	VFMINDB	$9, V20, V21, V22	// e76450903eee
+	WFMINDB	$10, V23, V24, V25	// e79780a83eee
+	WFMINDB	$10, F7, F8, F9		// e79780a830ee

 	RET
 	RET	foo(SB)
--- a/src/cmd/cgo/internal/test/test.go
+++ b/src/cmd/cgo/internal/test/test.go
@ -245,7 +245,7 @@ static void *thread(void *p) {
 	return NULL;
 }
 void testSendSIG() {
-	const int N = 20;
+	enum { N = 20 };
 	int i;
 	pthread_t tid[N];
 	for (i = 0; i < N; i++) {
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@ -1812,7 +1812,7 @@ void _cgoPREFIX_Cfunc__Cmalloc(void *v) {
 	void *ret;
 	_cgo_tsan_acquire();
 	ret = malloc(a->p0);
-	if (ret == 0 && a->p0 == 0) {
+	if (ret == NULL && a->p0 == 0) {
 		ret = malloc(1);
 	}
 	a->r1 = ret;
--- a/src/cmd/compile/internal/amd64/ggen.go
+++ b/src/cmd/compile/internal/amd64/ggen.go
@ -5,113 +5,23 @@
 package amd64

 import (
-	"cmd/compile/internal/ir"
 	"cmd/compile/internal/objw"
-	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/x86"
 )

-// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
-// See runtime/mkduff.go.
-const (
-	dzBlocks    = 16 // number of MOV/ADD blocks
-	dzBlockLen  = 4  // number of clears per block
-	dzBlockSize = 23 // size of instructions in a single block
-	dzMovSize   = 5  // size of single MOV instruction w/ offset
-	dzLeaqSize  = 4  // size of single LEAQ instruction
-	dzClearStep = 16 // number of bytes cleared by each MOV instruction
-
-	dzClearLen = dzClearStep * dzBlockLen // bytes cleared by one block
-	dzSize     = dzBlocks * dzBlockSize
-)
-
-// dzOff returns the offset for a jump into DUFFZERO.
-// b is the number of bytes to zero.
-func dzOff(b int64) int64 {
-	off := int64(dzSize)
-	off -= b / dzClearLen * dzBlockSize
-	tailLen := b % dzClearLen
-	if tailLen >= dzClearStep {
-		off -= dzLeaqSize + dzMovSize*(tailLen/dzClearStep)
-	}
-	return off
-}
-
-// duffzeroDI returns the pre-adjustment to DI for a call to DUFFZERO.
-// b is the number of bytes to zero.
-func dzDI(b int64) int64 {
-	tailLen := b % dzClearLen
-	if tailLen < dzClearStep {
-		return 0
-	}
-	tailSteps := tailLen / dzClearStep
-	return -dzClearStep * (dzBlockLen - tailSteps)
-}
-
 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, state *uint32) *obj.Prog {
-	const (
-		r13 = 1 << iota // if R13 is already zeroed.
-	)
-
-	if cnt == 0 {
-		return p
+	if cnt%8 != 0 {
+		panic("zeroed region not aligned")
 	}
-
-	if cnt == 8 {
+	for cnt >= 16 {
+		p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off)
+		off += 16
+		cnt -= 16
+	}
+	if cnt != 0 {
 		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off)
-	} else if cnt <= int64(8*types.RegSize) {
-		for i := int64(0); i < cnt/16; i++ {
-			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+i*16)
-		}
-
-		if cnt%16 != 0 {
-			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_SP, off+cnt-int64(16))
-		}
-	} else if cnt <= int64(128*types.RegSize) {
-		// Save DI to r12. With the amd64 Go register abi, DI can contain
-		// an incoming parameter, whereas R12 is always scratch.
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
-		// Emit duffzero call
-		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off+dzDI(cnt), obj.TYPE_REG, x86.REG_DI, 0)
-		p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_ADDR, 0, dzOff(cnt))
-		p.To.Sym = ir.Syms.Duffzero
-		if cnt%16 != 0 {
-			p = pp.Append(p, x86.AMOVUPS, obj.TYPE_REG, x86.REG_X15, 0, obj.TYPE_MEM, x86.REG_DI, -int64(8))
-		}
-		// Restore DI from r12
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
-
-	} else {
-		// When the register ABI is in effect, at this point in the
-		// prolog we may have live values in all of RAX,RDI,RCX. Save
-		// them off to registers before the REPSTOSQ below, then
-		// restore. Note that R12 and R13 are always available as
-		// scratch regs; here we also use R15 (this is safe to do
-		// since there won't be any globals accessed in the prolog).
-		// See rewriteToUseGot() in obj6.go for more on r15 use.
-
-		// Save rax/rdi/rcx
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_DI, 0, obj.TYPE_REG, x86.REG_R12, 0)
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_R13, 0)
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_CX, 0, obj.TYPE_REG, x86.REG_R15, 0)
-
-		// Set up the REPSTOSQ and kick it off.
-		p = pp.Append(p, x86.AXORL, obj.TYPE_REG, x86.REG_AX, 0, obj.TYPE_REG, x86.REG_AX, 0)
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_CONST, 0, cnt/int64(types.RegSize), obj.TYPE_REG, x86.REG_CX, 0)
-		p = pp.Append(p, leaptr, obj.TYPE_MEM, x86.REG_SP, off, obj.TYPE_REG, x86.REG_DI, 0)
-		p = pp.Append(p, x86.AREP, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
-		p = pp.Append(p, x86.ASTOSQ, obj.TYPE_NONE, 0, 0, obj.TYPE_NONE, 0, 0)
-
-		// Restore rax/rdi/rcx
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R12, 0, obj.TYPE_REG, x86.REG_DI, 0)
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R13, 0, obj.TYPE_REG, x86.REG_AX, 0)
-		p = pp.Append(p, x86.AMOVQ, obj.TYPE_REG, x86.REG_R15, 0, obj.TYPE_REG, x86.REG_CX, 0)
-
-		// Record the fact that r13 is no longer zero.
-		*state &= ^uint32(r13)
 	}
-
 	return p
 }

--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@ -17,6 +17,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/x86"
+	"internal/abi"
 )

 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
@ -147,6 +148,15 @@ func memIdx(a *obj.Addr, v *ssa.Value) {

 // DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
 // See runtime/mkduff.go.
+const (
+	dzBlocks    = 16 // number of MOV/ADD blocks
+	dzBlockLen  = 4  // number of clears per block
+	dzBlockSize = 23 // size of instructions in a single block
+	dzMovSize   = 5  // size of single MOV instruction w/ offset
+	dzLeaqSize  = 4  // size of single LEAQ instruction
+	dzClearStep = 16 // number of bytes cleared by each MOV instruction
+)
+
 func duffStart(size int64) int64 {
 	x, _ := duff(size)
 	return x
@ -1001,26 +1011,103 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		ssagen.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
-	case ssa.OpAMD64DUFFZERO:
+
+	case ssa.OpAMD64LoweredZero:
 		if s.ABI != obj.ABIInternal {
 			// zero X15 manually
 			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
 		}
-		off := duffStart(v.AuxInt)
-		adj := duffAdj(v.AuxInt)
-		var p *obj.Prog
-		if adj != 0 {
-			p = s.Prog(x86.ALEAQ)
-			p.From.Type = obj.TYPE_MEM
-			p.From.Offset = adj
-			p.From.Reg = x86.REG_DI
-			p.To.Type = obj.TYPE_REG
-			p.To.Reg = x86.REG_DI
+		ptrReg := v.Args[0].Reg()
+		n := v.AuxInt
+		if n < 16 {
+			v.Fatalf("Zero too small %d", n)
 		}
-		p = s.Prog(obj.ADUFFZERO)
-		p.To.Type = obj.TYPE_ADDR
-		p.To.Sym = ir.Syms.Duffzero
-		p.To.Offset = off
+		zero16 := func(off int64) {
+			zero16(s, ptrReg, off)
+		}
+
+		// Generate zeroing instructions.
+		var off int64
+		for n >= 16 {
+			zero16(off)
+			off += 16
+			n -= 16
+		}
+		if n != 0 {
+			// use partially overlapped write.
+			// TODO: n <= 8, use smaller write?
+			zero16(off + n - 16)
+		}
+
+	case ssa.OpAMD64LoweredZeroLoop:
+		if s.ABI != obj.ABIInternal {
+			// zero X15 manually
+			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
+		}
+		ptrReg := v.Args[0].Reg()
+		countReg := v.RegTmp()
+		n := v.AuxInt
+		loopSize := int64(64)
+		if n < 3*loopSize {
+			// - a loop count of 0 won't work.
+			// - a loop count of 1 is useless.
+			// - a loop count of 2 is a code size ~tie
+			//     4 instructions to implement the loop
+			//     4 instructions in the loop body
+			//   vs
+			//     8 instructions in the straightline code
+			//   Might as well use straightline code.
+			v.Fatalf("ZeroLoop size too small %d", n)
+		}
+		zero16 := func(off int64) {
+			zero16(s, ptrReg, off)
+		}
+
+		// Put iteration count in a register.
+		//   MOVL    $n, countReg
+		p := s.Prog(x86.AMOVL)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = n / loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		cntInit := p
+
+		// Zero loopSize bytes starting at ptrReg.
+		for i := range loopSize / 16 {
+			zero16(i * 16)
+		}
+		//   ADDQ    $loopSize, ptrReg
+		p = s.Prog(x86.AADDQ)
+		p.From.Type = obj.TYPE_CONST
+		p.From.Offset = loopSize
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = ptrReg
+		//   DECL    countReg
+		p = s.Prog(x86.ADECL)
+		p.To.Type = obj.TYPE_REG
+		p.To.Reg = countReg
+		// Jump to first instruction in loop if we're not done yet.
+		//   JNE     head
+		p = s.Prog(x86.AJNE)
+		p.To.Type = obj.TYPE_BRANCH
+		p.To.SetTarget(cntInit.Link)
+
+		// Multiples of the loop size are now done.
+		n %= loopSize
+
+		// Write any fractional portion.
+		var off int64
+		for n >= 16 {
+			zero16(off)
+			off += 16
+			n -= 16
+		}
+		if n != 0 {
+			// Use partially-overlapping write.
+			// TODO: n <= 8, use smaller write?
+			zero16(off + n - 16)
+		}
+
 	case ssa.OpAMD64DUFFCOPY:
 		p := s.Prog(obj.ADUFFCOPY)
 		p.To.Type = obj.TYPE_ADDR
@ -1151,12 +1238,91 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]

-	case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+	case ssa.OpAMD64LoweredPanicBoundsRR, ssa.OpAMD64LoweredPanicBoundsRC, ssa.OpAMD64LoweredPanicBoundsCR, ssa.OpAMD64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpAMD64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - x86.REG_AX)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - x86.REG_AX)
+		case ssa.OpAMD64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - x86.REG_AX)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(x86.AMOVQ)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = x86.REG_AX + int16(yVal)
+			}
+		case ssa.OpAMD64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - x86.REG_AX)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(x86.AMOVQ)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = x86.REG_AX + int16(xVal)
+			}
+		case ssa.OpAMD64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(x86.AMOVQ)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = x86.REG_AX + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(x86.AMOVQ)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = x86.REG_AX + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(int64(2 * types.PtrSize)) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds

 	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
 		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
@ -1931,6 +2097,17 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in
 	return p
 }

+// zero 16 bytes at reg+off.
+func zero16(s *ssagen.State, reg int16, off int64) {
+	//   MOVUPS  X15, off(ptrReg)
+	p := s.Prog(x86.AMOVUPS)
+	p.From.Type = obj.TYPE_REG
+	p.From.Reg = x86.REG_X15
+	p.To.Type = obj.TYPE_MEM
+	p.To.Reg = reg
+	p.To.Offset = off
+}
+
 // XXX maybe make this part of v.Reg?
 // On the other hand, it is architecture-specific.
 func simdReg(v *ssa.Value) int16 {
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
@ -18,6 +18,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/arm"
+	"internal/abi"
 )

 // loadByType returns the load instruction of the given type.
@ -712,18 +713,167 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Name = obj.NAME_EXTERN
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
-	case ssa.OpARMLoweredPanicBoundsA, ssa.OpARMLoweredPanicBoundsB, ssa.OpARMLoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+
+	case ssa.OpARMLoweredPanicBoundsRR, ssa.OpARMLoweredPanicBoundsRC, ssa.OpARMLoweredPanicBoundsCR, ssa.OpARMLoweredPanicBoundsCC,
+		ssa.OpARMLoweredPanicExtendRR, ssa.OpARMLoweredPanicExtendRC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		extend := false
+		switch v.Op {
+		case ssa.OpARMLoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - arm.REG_R0)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - arm.REG_R0)
+		case ssa.OpARMLoweredPanicExtendRR:
+			extend = true
+			xIsReg = true
+			hi := int(v.Args[0].Reg() - arm.REG_R0)
+			lo := int(v.Args[1].Reg() - arm.REG_R0)
+			xVal = hi<<2 + lo // encode 2 register numbers
+			yIsReg = true
+			yVal = int(v.Args[2].Reg() - arm.REG_R0)
+		case ssa.OpARMLoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - arm.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(yVal)
+			}
+		case ssa.OpARMLoweredPanicExtendRC:
+			extend = true
+			xIsReg = true
+			hi := int(v.Args[0].Reg() - arm.REG_R0)
+			lo := int(v.Args[1].Reg() - arm.REG_R0)
+			xVal = hi<<2 + lo // encode 2 register numbers
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				for yVal == hi || yVal == lo {
+					yVal++
+				}
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(yVal)
+			}
+		case ssa.OpARMLoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - arm.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
+				// Move constant to a register
+				xIsReg = true
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(xVal)
+			} else {
+				// Move constant to two registers
+				extend = true
+				xIsReg = true
+				hi := 0
+				lo := 1
+				if hi == yVal {
+					hi = 2
+				}
+				if lo == yVal {
+					lo = 2
+				}
+				xVal = hi<<2 + lo
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c >> 32
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(hi)
+				p = s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = int64(int32(c))
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(lo)
+			}
+		case ssa.OpARMLoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(xVal)
+			} else {
+				// Move constant to two registers
+				extend = true
+				xIsReg = true
+				hi := 0
+				lo := 1
+				xVal = hi<<2 + lo
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c >> 32
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(hi)
+				p = s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = int64(int32(c))
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(lo)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 2
+				p := s.Prog(arm.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm.REG_R0 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(8) // space used in callee args area by assembly stubs
-	case ssa.OpARMLoweredPanicExtendA, ssa.OpARMLoweredPanicExtendB, ssa.OpARMLoweredPanicExtendC:
-		p := s.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt]
-		s.UseArgs(12) // space used in callee args area by assembly stubs
+		if extend {
+			p.To.Sym = ir.Syms.PanicExtend
+		} else {
+			p.To.Sym = ir.Syms.PanicBounds
+		}
+
 	case ssa.OpARMDUFFZERO:
 		p := s.Prog(obj.ADUFFZERO)
 		p.To.Type = obj.TYPE_MEM
--- a/src/cmd/compile/internal/arm64/ggen.go
+++ b/src/cmd/compile/internal/arm64/ggen.go
@ -5,9 +5,7 @@
 package arm64

 import (
-	"cmd/compile/internal/ir"
 	"cmd/compile/internal/objw"
-	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/arm64"
 )
@ -22,47 +20,20 @@ func padframe(frame int64) int64 {
 }

 func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
-	if cnt == 0 {
-		return p
+	if cnt%8 != 0 {
+		panic("zeroed region not aligned")
 	}
-	if cnt < int64(4*types.PtrSize) {
-		for i := int64(0); i < cnt; i += int64(types.PtrSize) {
-			p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off+i)
-		}
-	} else if cnt <= int64(128*types.PtrSize) {
-		if cnt%(2*int64(types.PtrSize)) != 0 {
-			p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, 8+off)
-			off += int64(types.PtrSize)
-			cnt -= int64(types.PtrSize)
-		}
-		p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REG_R20, 0)
-		p = pp.Append(p, arm64.AADD, obj.TYPE_CONST, 0, 8+off, obj.TYPE_REG, arm64.REG_R20, 0)
-		p.Reg = arm64.REG_R20
-		p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ir.Syms.Duffzero
-		p.To.Offset = 4 * (64 - cnt/(2*int64(types.PtrSize)))
-	} else {
-		// Not using REGTMP, so this is async preemptible (async preemption clobbers REGTMP).
-		// We are at the function entry, where no register is live, so it is okay to clobber
-		// other registers
-		const rtmp = arm64.REG_R20
-		p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, 8+off-8, obj.TYPE_REG, rtmp, 0)
-		p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGSP, 0, obj.TYPE_REG, arm64.REGRT1, 0)
-		p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT1, 0)
-		p.Reg = arm64.REGRT1
-		p = pp.Append(p, arm64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, rtmp, 0)
-		p = pp.Append(p, arm64.AADD, obj.TYPE_REG, rtmp, 0, obj.TYPE_REG, arm64.REGRT2, 0)
-		p.Reg = arm64.REGRT1
-		p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGRT1, int64(types.PtrSize))
-		p.Scond = arm64.C_XPRE
-		p1 := p
-		p = pp.Append(p, arm64.ACMP, obj.TYPE_REG, arm64.REGRT1, 0, obj.TYPE_NONE, 0, 0)
-		p.Reg = arm64.REGRT2
-		p = pp.Append(p, arm64.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0)
-		p.To.SetTarget(p1)
+	off += 8 // return address was ignored in offset calculation
+	for cnt >= 16 && off < 512 {
+		p = pp.Append(p, arm64.ASTP, obj.TYPE_REGREG, arm64.REGZERO, arm64.REGZERO, obj.TYPE_MEM, arm64.REGSP, off)
+		off += 16
+		cnt -= 16
+	}
+	for cnt != 0 {
+		p = pp.Append(p, arm64.AMOVD, obj.TYPE_REG, arm64.REGZERO, 0, obj.TYPE_MEM, arm64.REGSP, off)
+		off += 8
+		cnt -= 8
 	}
-
 	return p
 }

--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@ -16,6 +16,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/arm64"
+	"internal/abi"
 )

 // loadByType returns the load instruction of the given type.
@ -1122,12 +1123,91 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]

-	case ssa.OpARM64LoweredPanicBoundsA, ssa.OpARM64LoweredPanicBoundsB, ssa.OpARM64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpARM64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
+		case ssa.OpARM64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(arm64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm64.REG_R0 + int16(yVal)
+			}
+		case ssa.OpARM64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - arm64.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(arm64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm64.REG_R0 + int16(xVal)
+			}
+		case ssa.OpARM64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(arm64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm64.REG_R0 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(arm64.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = arm64.REG_R0 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds
+
 	case ssa.OpARM64LoweredNilCheck:
 		// Issue a load which will fault if arg is nil.
 		p := s.Prog(arm64.AMOVB)
--- a/src/cmd/compile/internal/dwarfgen/dwarf.go
+++ b/src/cmd/compile/internal/dwarfgen/dwarf.go
@ -203,7 +203,7 @@ func createDwarfVars(fnsym *obj.LSym, complexOK bool, fn *ir.Func, apDecls []*ir
 				continue
 			}
 			if n.Class != ir.PPARAMOUT || !n.IsOutputParamInRegisters() {
-				panic("invalid ir.Name on debugInfo.RegOutputParams list")
+				base.Fatalf("invalid ir.Name on debugInfo.RegOutputParams list")
 			}
 			dcl = append(dcl, n)
 		}
@ -248,11 +248,6 @@ func createDwarfVars(fnsym *obj.LSym, complexOK bool, fn *ir.Func, apDecls []*ir
 		if n.Class == ir.PPARAM || n.Class == ir.PPARAMOUT {
 			tag = dwarf.DW_TAG_formal_parameter
 		}
-		if n.Esc() == ir.EscHeap {
-			// The variable in question has been promoted to the heap.
-			// Its address is in n.Heapaddr.
-			// TODO(thanm): generate a better location expression
-		}
 		inlIndex := 0
 		if base.Flag.GenDwarfInl > 1 {
 			if n.InlFormal() || n.InlLocal() {
@ -263,7 +258,7 @@ func createDwarfVars(fnsym *obj.LSym, complexOK bool, fn *ir.Func, apDecls []*ir
 			}
 		}
 		declpos := base.Ctxt.InnermostPos(n.Pos())
-		vars = append(vars, &dwarf.Var{
+		dvar := &dwarf.Var{
 			Name:          n.Sym().Name,
 			IsReturnValue: isReturnValue,
 			Tag:           tag,
@ -277,8 +272,19 @@ func createDwarfVars(fnsym *obj.LSym, complexOK bool, fn *ir.Func, apDecls []*ir
 			ChildIndex:    -1,
 			DictIndex:     n.DictIndex,
 			ClosureOffset: closureOffset(n, closureVars),
-		})
-		// Record go type of to insure that it gets emitted by the linker.
+		}
+		if n.Esc() == ir.EscHeap {
+			if n.Heapaddr == nil {
+				base.Fatalf("invalid heap allocated var without Heapaddr")
+			}
+			debug := fn.DebugInfo.(*ssa.FuncDebug)
+			list := createHeapDerefLocationList(n, debug.EntryID)
+			dvar.PutLocationList = func(listSym, startPC dwarf.Sym) {
+				debug.PutLocationList(list, base.Ctxt, listSym.(*obj.LSym), startPC.(*obj.LSym))
+			}
+		}
+		vars = append(vars, dvar)
+		// Record go type to ensure that it gets emitted by the linker.
 		fnsym.Func().RecordAutoType(reflectdata.TypeLinksym(n.Type()))
 	}

@ -550,11 +556,34 @@ func createComplexVar(fnsym *obj.LSym, fn *ir.Func, varID ssa.VarID, closureVars
 	return dvar
 }

+// createHeapDerefLocationList creates a location list for a heap-escaped variable
+// that describes "dereference pointer at stack offset"
+func createHeapDerefLocationList(n *ir.Name, entryID ssa.ID) []byte {
+	// Get the stack offset where the heap pointer is stored
+	heapPtrOffset := n.Heapaddr.FrameOffset()
+	if base.Ctxt.Arch.FixedFrameSize == 0 {
+		heapPtrOffset -= int64(types.PtrSize)
+	}
+	if buildcfg.FramePointerEnabled {
+		heapPtrOffset -= int64(types.PtrSize)
+	}
+
+	// Create a location expression: DW_OP_fbreg <offset> DW_OP_deref
+	var locExpr []byte
+	var sizeIdx int
+	locExpr, sizeIdx = ssa.SetupLocList(base.Ctxt, entryID, locExpr, ssa.BlockStart.ID, ssa.FuncEnd.ID)
+	locExpr = append(locExpr, dwarf.DW_OP_fbreg)
+	locExpr = dwarf.AppendSleb128(locExpr, heapPtrOffset)
+	locExpr = append(locExpr, dwarf.DW_OP_deref)
+	base.Ctxt.Arch.ByteOrder.PutUint16(locExpr[sizeIdx:], uint16(len(locExpr)-sizeIdx-2))
+	return locExpr
+}
+
 // RecordFlags records the specified command-line flags to be placed
 // in the DWARF info.
 func RecordFlags(flags ...string) {
 	if base.Ctxt.Pkgpath == "" {
-		panic("missing pkgpath")
+		base.Fatalf("missing pkgpath")
 	}

 	type BoolFlag interface {
--- a/src/cmd/compile/internal/escape/call.go
+++ b/src/cmd/compile/internal/escape/call.go
@ -192,7 +192,7 @@ func (e *escape) call(ks []hole, call ir.Node) {
 		e.discard(call.X)
 		e.discard(call.Y)

-	case ir.ODELETE, ir.OPRINT, ir.OPRINTLN, ir.ORECOVERFP:
+	case ir.ODELETE, ir.OPRINT, ir.OPRINTLN, ir.ORECOVER:
 		call := call.(*ir.CallExpr)
 		for _, arg := range call.Args {
 			e.discard(arg)
--- a/src/cmd/compile/internal/escape/escape.go
+++ b/src/cmd/compile/internal/escape/escape.go
@ -122,17 +122,24 @@ type escape struct {
 }

 func Funcs(all []*ir.Func) {
-	ir.VisitFuncsBottomUp(all, Batch)
+	// Make a cache of ir.ReassignOracles. The cache is lazily populated.
+	// TODO(thepudds): consider adding a field on ir.Func instead. We might also be able
+	// to use that field elsewhere, like in walk. See discussion in https://go.dev/cl/688075.
+	reassignOracles := make(map[*ir.Func]*ir.ReassignOracle)
+
+	ir.VisitFuncsBottomUp(all, func(list []*ir.Func, recursive bool) {
+		Batch(list, reassignOracles)
+	})
 }

 // Batch performs escape analysis on a minimal batch of
 // functions.
-func Batch(fns []*ir.Func, recursive bool) {
+func Batch(fns []*ir.Func, reassignOracles map[*ir.Func]*ir.ReassignOracle) {
 	var b batch
 	b.heapLoc.attrs = attrEscapes | attrPersists | attrMutates | attrCalls
 	b.mutatorLoc.attrs = attrMutates
 	b.calleeLoc.attrs = attrCalls
-	b.reassignOracles = make(map[*ir.Func]*ir.ReassignOracle)
+	b.reassignOracles = reassignOracles

 	// Construct data-flow graph from syntax trees.
 	for _, fn := range fns {
@ -531,19 +538,9 @@ func (b *batch) rewriteWithLiterals(n ir.Node, fn *ir.Func) {
 	if n == nil || fn == nil {
 		return
 	}
-	if n.Op() != ir.OMAKESLICE && n.Op() != ir.OCONVIFACE {
-		return
-	}

-	// Look up a cached ReassignOracle for the function, lazily computing one if needed.
-	ro := b.reassignOracle(fn)
-	if ro == nil {
-		base.Fatalf("no ReassignOracle for function %v with closure parent %v", fn, fn.ClosureParent)
-	}
-
-	assignTemp := func(n ir.Node, init *ir.Nodes) {
+	assignTemp := func(pos src.XPos, n ir.Node, init *ir.Nodes) {
 		// Preserve any side effects of n by assigning it to an otherwise unused temp.
-		pos := n.Pos()
 		tmp := typecheck.TempAt(pos, fn, n.Type())
 		init.Append(typecheck.Stmt(ir.NewDecl(pos, ir.ODCL, tmp)))
 		init.Append(typecheck.Stmt(ir.NewAssignStmt(pos, tmp, n)))
@ -561,6 +558,11 @@ func (b *batch) rewriteWithLiterals(n ir.Node, fn *ir.Func) {
 		}

 		if (*r).Op() != ir.OLITERAL {
+			// Look up a cached ReassignOracle for the function, lazily computing one if needed.
+			ro := b.reassignOracle(fn)
+			if ro == nil {
+				base.Fatalf("no ReassignOracle for function %v with closure parent %v", fn, fn.ClosureParent)
+			}
 			if s := ro.StaticValue(*r); s.Op() == ir.OLITERAL {
 				lit, ok := s.(*ir.BasicLit)
 				if !ok || lit.Val().Kind() != constant.Int {
@ -572,8 +574,8 @@ func (b *batch) rewriteWithLiterals(n ir.Node, fn *ir.Func) {
 						return
 					}
 					// Preserve any side effects of the original expression, then replace it.
-					assignTemp(*r, n.PtrInit())
-					*r = lit
+					assignTemp(n.Pos(), *r, n.PtrInit())
+					*r = ir.NewBasicLit(n.Pos(), (*r).Type(), lit.Val())
 				}
 			}
 		}
@ -582,6 +584,12 @@ func (b *batch) rewriteWithLiterals(n ir.Node, fn *ir.Func) {
 		// a literal to avoid heap allocating the underlying interface value.
 		conv := n.(*ir.ConvExpr)
 		if conv.X.Op() != ir.OLITERAL && !conv.X.Type().IsInterface() {
+			// TODO(thepudds): likely could avoid some work by tightening the check of conv.X's type.
+			// Look up a cached ReassignOracle for the function, lazily computing one if needed.
+			ro := b.reassignOracle(fn)
+			if ro == nil {
+				base.Fatalf("no ReassignOracle for function %v with closure parent %v", fn, fn.ClosureParent)
+			}
 			v := ro.StaticValue(conv.X)
 			if v != nil && v.Op() == ir.OLITERAL && ir.ValidTypeForConst(conv.X.Type(), v.Val()) {
 				if !base.LiteralAllocHash.MatchPos(n.Pos(), nil) {
@ -592,9 +600,9 @@ func (b *batch) rewriteWithLiterals(n ir.Node, fn *ir.Func) {
 					base.WarnfAt(n.Pos(), "rewriting OCONVIFACE value from %v (%v) to %v (%v)", conv.X, conv.X.Type(), v, v.Type())
 				}
 				// Preserve any side effects of the original expression, then replace it.
-				assignTemp(conv.X, conv.PtrInit())
+				assignTemp(conv.Pos(), conv.X, conv.PtrInit())
 				v := v.(*ir.BasicLit)
-				conv.X = ir.NewBasicLit(conv.X.Pos(), conv.X.Type(), v.Val())
+				conv.X = ir.NewBasicLit(conv.Pos(), conv.X.Type(), v.Val())
 				typecheck.Expr(conv)
 			}
 		}
--- a/src/cmd/compile/internal/escape/expr.go
+++ b/src/cmd/compile/internal/escape/expr.go
@ -139,7 +139,7 @@ func (e *escape) exprSkipInit(k hole, n ir.Node) {
 		e.discard(n.X)

 	case ir.OCALLMETH, ir.OCALLFUNC, ir.OCALLINTER, ir.OINLCALL,
-		ir.OLEN, ir.OCAP, ir.OMIN, ir.OMAX, ir.OCOMPLEX, ir.OREAL, ir.OIMAG, ir.OAPPEND, ir.OCOPY, ir.ORECOVERFP,
+		ir.OLEN, ir.OCAP, ir.OMIN, ir.OMAX, ir.OCOMPLEX, ir.OREAL, ir.OIMAG, ir.OAPPEND, ir.OCOPY, ir.ORECOVER,
 		ir.OUNSAFEADD, ir.OUNSAFESLICE, ir.OUNSAFESTRING, ir.OUNSAFESTRINGDATA, ir.OUNSAFESLICEDATA:
 		e.call([]hole{k}, n)

--- a/src/cmd/compile/internal/escape/stmt.go
+++ b/src/cmd/compile/internal/escape/stmt.go
@ -183,7 +183,7 @@ func (e *escape) stmt(n ir.Node) {
 			dsts[i] = res.Nname.(*ir.Name)
 		}
 		e.assignList(dsts, n.Results, "return", n)
-	case ir.OCALLFUNC, ir.OCALLMETH, ir.OCALLINTER, ir.OINLCALL, ir.OCLEAR, ir.OCLOSE, ir.OCOPY, ir.ODELETE, ir.OPANIC, ir.OPRINT, ir.OPRINTLN, ir.ORECOVERFP:
+	case ir.OCALLFUNC, ir.OCALLMETH, ir.OCALLINTER, ir.OINLCALL, ir.OCLEAR, ir.OCLOSE, ir.OCOPY, ir.ODELETE, ir.OPANIC, ir.OPRINT, ir.OPRINTLN, ir.ORECOVER:
 		e.call(nil, n)
 	case ir.OGO, ir.ODEFER:
 		n := n.(*ir.GoDeferStmt)
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@ -104,12 +104,10 @@ func Main(archInit func(*ssagen.ArchInfo)) {
 	ir.Pkgs.Runtime = types.NewPkg("go.runtime", "runtime")
 	ir.Pkgs.Runtime.Prefix = "runtime"

-	if buildcfg.Experiment.SwissMap {
-		// Pseudo-package that contains the compiler's builtin
-		// declarations for maps.
-		ir.Pkgs.InternalMaps = types.NewPkg("go.internal/runtime/maps", "internal/runtime/maps")
-		ir.Pkgs.InternalMaps.Prefix = "internal/runtime/maps"
-	}
+	// Pseudo-package that contains the compiler's builtin
+	// declarations for maps.
+	ir.Pkgs.InternalMaps = types.NewPkg("go.internal/runtime/maps", "internal/runtime/maps")
+	ir.Pkgs.InternalMaps.Prefix = "internal/runtime/maps"

 	// pseudo-packages used in symbol tables
 	ir.Pkgs.Itab = types.NewPkg("go.itab", "go.itab")
--- a/src/cmd/compile/internal/importer/gcimporter_test.go
+++ b/src/cmd/compile/internal/importer/gcimporter_test.go
@ -673,3 +673,50 @@ type S struct {
 	}
 	wg.Wait()
 }
+
+func TestIssue63285(t *testing.T) {
+	testenv.MustHaveGoBuild(t)
+
+	// This package only handles gc export data.
+	if runtime.Compiler != "gc" {
+		t.Skipf("gc-built packages not available (compiler = %s)", runtime.Compiler)
+	}
+
+	tmpdir := t.TempDir()
+	testoutdir := filepath.Join(tmpdir, "testdata")
+	if err := os.Mkdir(testoutdir, 0700); err != nil {
+		t.Fatalf("making output dir: %v", err)
+	}
+
+	compile(t, "testdata", "issue63285.go", testoutdir, nil)
+
+	issue63285, err := Import(make(map[string]*types2.Package), "./testdata/issue63285", tmpdir, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	check := func(pkgname, src string, imports importMap) (*types2.Package, error) {
+		f, err := syntax.Parse(syntax.NewFileBase(pkgname), strings.NewReader(src), nil, nil, 0)
+		if err != nil {
+			return nil, err
+		}
+		config := &types2.Config{
+			Importer: imports,
+		}
+		return config.Check(pkgname, []*syntax.File{f}, nil)
+	}
+
+	const pSrc = `package p
+
+import "issue63285"
+
+var _ issue63285.A[issue63285.B[any]]
+`
+
+	importer := importMap{
+		"issue63285": issue63285,
+	}
+	if _, err := check("p", pSrc, importer); err != nil {
+		t.Errorf("Check failed: %v", err)
+	}
+}
--- a/src/cmd/compile/internal/importer/testdata/issue63285.go
+++ b/src/cmd/compile/internal/importer/testdata/issue63285.go
@ -0,0 +1,11 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package issue63285
+
+type A[_ B[any]] struct{}
+
+type B[_ any] interface {
+	f() A[B[any]]
+}
--- a/src/cmd/compile/internal/importer/ureader.go
+++ b/src/cmd/compile/internal/importer/ureader.go
@ -67,7 +67,8 @@ type reader struct {

 	p *pkgReader

-	dict *readerDict
+	dict    *readerDict
+	delayed []func()
 }

 type readerDict struct {
@ -420,7 +421,7 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types2.Package, string) {
 			pos := r.pos()
 			var tparams []*types2.TypeParam
 			if r.Version().Has(pkgbits.AliasTypeParamNames) {
-				tparams = r.typeParamNames()
+				tparams = r.typeParamNames(false)
 			}
 			typ := r.typ()
 			return newAliasTypeName(pr.enableAlias, pos, objPkg, objName, typ, tparams)
@ -433,28 +434,28 @@ func (pr *pkgReader) objIdx(idx pkgbits.Index) (*types2.Package, string) {

 		case pkgbits.ObjFunc:
 			pos := r.pos()
-			tparams := r.typeParamNames()
+			tparams := r.typeParamNames(false)
 			sig := r.signature(nil, nil, tparams)
 			return types2.NewFunc(pos, objPkg, objName, sig)

 		case pkgbits.ObjType:
 			pos := r.pos()

-			return types2.NewTypeNameLazy(pos, objPkg, objName, func(named *types2.Named) (tparams []*types2.TypeParam, underlying types2.Type, methods []*types2.Func) {
-				tparams = r.typeParamNames()
+			return types2.NewTypeNameLazy(pos, objPkg, objName, func(_ *types2.Named) ([]*types2.TypeParam, types2.Type, []*types2.Func, []func()) {
+				tparams := r.typeParamNames(true)

 				// TODO(mdempsky): Rewrite receiver types to underlying is an
 				// Interface? The go/types importer does this (I think because
 				// unit tests expected that), but cmd/compile doesn't care
 				// about it, so maybe we can avoid worrying about that here.
-				underlying = r.typ().Underlying()
+				underlying := r.typ().Underlying()

-				methods = make([]*types2.Func, r.Len())
+				methods := make([]*types2.Func, r.Len())
 				for i := range methods {
-					methods[i] = r.method()
+					methods[i] = r.method(true)
 				}

-				return
+				return tparams, underlying, methods, r.delayed
 			})

 		case pkgbits.ObjVar:
@ -497,7 +498,7 @@ func (pr *pkgReader) objDictIdx(idx pkgbits.Index) *readerDict {
 	return &dict
 }

-func (r *reader) typeParamNames() []*types2.TypeParam {
+func (r *reader) typeParamNames(isLazy bool) []*types2.TypeParam {
 	r.Sync(pkgbits.SyncTypeParamNames)

 	// Note: This code assumes it only processes objects without
@ -523,19 +524,38 @@ func (r *reader) typeParamNames() []*types2.TypeParam {
 		r.dict.tparams[i] = types2.NewTypeParam(tname, nil)
 	}

-	for i, bound := range r.dict.bounds {
-		r.dict.tparams[i].SetConstraint(r.p.typIdx(bound, r.dict))
+	// Type parameters that are read by lazy loaders cannot have their
+	// constraints set eagerly; do them after loading (go.dev/issue/63285).
+	if isLazy {
+		// The reader dictionary will continue mutating before we have time
+		// to call delayed functions; must make a local copy of both the type
+		// parameters and their (unexpanded) constraints.
+		bounds := make([]types2.Type, len(r.dict.bounds))
+		for i, bound := range r.dict.bounds {
+			bounds[i] = r.p.typIdx(bound, r.dict)
+		}
+
+		tparams := r.dict.tparams
+		r.delayed = append(r.delayed, func() {
+			for i, bound := range bounds {
+				tparams[i].SetConstraint(bound)
+			}
+		})
+	} else {
+		for i, bound := range r.dict.bounds {
+			r.dict.tparams[i].SetConstraint(r.p.typIdx(bound, r.dict))
+		}
 	}

 	return r.dict.tparams
 }

-func (r *reader) method() *types2.Func {
+func (r *reader) method(isLazy bool) *types2.Func {
 	r.Sync(pkgbits.SyncMethod)
 	pos := r.pos()
 	pkg, name := r.selector()

-	rtparams := r.typeParamNames()
+	rtparams := r.typeParamNames(isLazy)
 	sig := r.signature(r.param(), rtparams, nil)

 	_ = r.pos() // TODO(mdempsky): Remove; this is a hacker for linker.go.
--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@ -605,10 +605,7 @@ opSwitch:
 		v.budget -= inlineExtraPanicCost

 	case ir.ORECOVER:
-		base.FatalfAt(n.Pos(), "ORECOVER missed typecheck")
-	case ir.ORECOVERFP:
-		// recover matches the argument frame pointer to find
-		// the right panic value, so it needs an argument frame.
+		// TODO: maybe we could allow inlining of recover() now?
 		v.reason = "call to recover"
 		return true

--- a/src/cmd/compile/internal/inline/inlheur/analyze_func_flags.go
+++ b/src/cmd/compile/internal/inline/inlheur/analyze_func_flags.go
@ -335,7 +335,7 @@ func (ffa *funcFlagsAnalyzer) nodeVisitPost(n ir.Node) {
 		ir.OPRINTLN, ir.OPRINT, ir.OLABEL, ir.OCALLINTER, ir.ODEFER,
 		ir.OSEND, ir.ORECV, ir.OSELRECV2, ir.OGO, ir.OAPPEND, ir.OAS2DOTTYPE,
 		ir.OAS2MAPR, ir.OGETG, ir.ODELETE, ir.OINLMARK, ir.OAS2RECV,
-		ir.OMIN, ir.OMAX, ir.OMAKE, ir.ORECOVERFP, ir.OGETCALLERSP:
+		ir.OMIN, ir.OMAX, ir.OMAKE, ir.OGETCALLERSP:
 		// these should all be benign/uninteresting
 	case ir.OTAILCALL, ir.OJUMPTABLE, ir.OTYPESW:
 		// don't expect to see these at all.
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@ -213,7 +213,7 @@ func (n *CallExpr) SetOp(op Op) {
 		ODELETE,
 		OGETG, OGETCALLERSP,
 		OMAKE, OMAX, OMIN, OPRINT, OPRINTLN,
-		ORECOVER, ORECOVERFP:
+		ORECOVER:
 		n.op = op
 	}
 }
@ -912,12 +912,12 @@ FindRHS:
 				break FindRHS
 			}
 		}
-		base.Fatalf("%v missing from LHS of %v", n, defn)
+		base.FatalfAt(defn.Pos(), "%v missing from LHS of %v", n, defn)
 	default:
 		return nil
 	}
 	if rhs == nil {
-		base.Fatalf("RHS is nil: %v", defn)
+		base.FatalfAt(defn.Pos(), "RHS is nil: %v", defn)
 	}

 	if Reassigned(n) {
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@ -234,7 +234,6 @@ const (
 	OSLICEHEADER      // sliceheader{Ptr, Len, Cap} (Ptr is unsafe.Pointer, Len is length, Cap is capacity)
 	OSTRINGHEADER     // stringheader{Ptr, Len} (Ptr is unsafe.Pointer, Len is length)
 	ORECOVER          // recover()
-	ORECOVERFP        // recover(Args) w/ explicit FP argument
 	ORECV             // <-X
 	ORUNESTR          // Type(X) (Type is string, X is rune)
 	OSELRECV2         // like OAS2: Lhs = Rhs where len(Lhs)=2, len(Rhs)=1, Rhs[0].Op = ORECV (appears as .Var of OCASE)
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@ -108,62 +108,61 @@ func _() {
 	_ = x[OSLICEHEADER-97]
 	_ = x[OSTRINGHEADER-98]
 	_ = x[ORECOVER-99]
-	_ = x[ORECOVERFP-100]
-	_ = x[ORECV-101]
-	_ = x[ORUNESTR-102]
-	_ = x[OSELRECV2-103]
-	_ = x[OMIN-104]
-	_ = x[OMAX-105]
-	_ = x[OREAL-106]
-	_ = x[OIMAG-107]
-	_ = x[OCOMPLEX-108]
-	_ = x[OUNSAFEADD-109]
-	_ = x[OUNSAFESLICE-110]
-	_ = x[OUNSAFESLICEDATA-111]
-	_ = x[OUNSAFESTRING-112]
-	_ = x[OUNSAFESTRINGDATA-113]
-	_ = x[OMETHEXPR-114]
-	_ = x[OMETHVALUE-115]
-	_ = x[OBLOCK-116]
-	_ = x[OBREAK-117]
-	_ = x[OCASE-118]
-	_ = x[OCONTINUE-119]
-	_ = x[ODEFER-120]
-	_ = x[OFALL-121]
-	_ = x[OFOR-122]
-	_ = x[OGOTO-123]
-	_ = x[OIF-124]
-	_ = x[OLABEL-125]
-	_ = x[OGO-126]
-	_ = x[ORANGE-127]
-	_ = x[ORETURN-128]
-	_ = x[OSELECT-129]
-	_ = x[OSWITCH-130]
-	_ = x[OTYPESW-131]
-	_ = x[OINLCALL-132]
-	_ = x[OMAKEFACE-133]
-	_ = x[OITAB-134]
-	_ = x[OIDATA-135]
-	_ = x[OSPTR-136]
-	_ = x[OCFUNC-137]
-	_ = x[OCHECKNIL-138]
-	_ = x[ORESULT-139]
-	_ = x[OINLMARK-140]
-	_ = x[OLINKSYMOFFSET-141]
-	_ = x[OJUMPTABLE-142]
-	_ = x[OINTERFACESWITCH-143]
-	_ = x[ODYNAMICDOTTYPE-144]
-	_ = x[ODYNAMICDOTTYPE2-145]
-	_ = x[ODYNAMICTYPE-146]
-	_ = x[OTAILCALL-147]
-	_ = x[OGETG-148]
-	_ = x[OGETCALLERSP-149]
-	_ = x[OEND-150]
+	_ = x[ORECV-100]
+	_ = x[ORUNESTR-101]
+	_ = x[OSELRECV2-102]
+	_ = x[OMIN-103]
+	_ = x[OMAX-104]
+	_ = x[OREAL-105]
+	_ = x[OIMAG-106]
+	_ = x[OCOMPLEX-107]
+	_ = x[OUNSAFEADD-108]
+	_ = x[OUNSAFESLICE-109]
+	_ = x[OUNSAFESLICEDATA-110]
+	_ = x[OUNSAFESTRING-111]
+	_ = x[OUNSAFESTRINGDATA-112]
+	_ = x[OMETHEXPR-113]
+	_ = x[OMETHVALUE-114]
+	_ = x[OBLOCK-115]
+	_ = x[OBREAK-116]
+	_ = x[OCASE-117]
+	_ = x[OCONTINUE-118]
+	_ = x[ODEFER-119]
+	_ = x[OFALL-120]
+	_ = x[OFOR-121]
+	_ = x[OGOTO-122]
+	_ = x[OIF-123]
+	_ = x[OLABEL-124]
+	_ = x[OGO-125]
+	_ = x[ORANGE-126]
+	_ = x[ORETURN-127]
+	_ = x[OSELECT-128]
+	_ = x[OSWITCH-129]
+	_ = x[OTYPESW-130]
+	_ = x[OINLCALL-131]
+	_ = x[OMAKEFACE-132]
+	_ = x[OITAB-133]
+	_ = x[OIDATA-134]
+	_ = x[OSPTR-135]
+	_ = x[OCFUNC-136]
+	_ = x[OCHECKNIL-137]
+	_ = x[ORESULT-138]
+	_ = x[OINLMARK-139]
+	_ = x[OLINKSYMOFFSET-140]
+	_ = x[OJUMPTABLE-141]
+	_ = x[OINTERFACESWITCH-142]
+	_ = x[ODYNAMICDOTTYPE-143]
+	_ = x[ODYNAMICDOTTYPE2-144]
+	_ = x[ODYNAMICTYPE-145]
+	_ = x[OTAILCALL-146]
+	_ = x[OGETG-147]
+	_ = x[OGETCALLERSP-148]
+	_ = x[OEND-149]
 }

-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLEARCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTLNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERSTRINGHEADERRECOVERRECVRUNESTRSELRECV2MINMAXREALIMAGCOMPLEXUNSAFEADDUNSAFESLICEUNSAFESLICEDATAUNSAFESTRINGUNSAFESTRINGDATAMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWINLCALLMAKEFACEITABIDATASPTRCFUNCCHECKNILRESULTINLMARKLINKSYMOFFSETJUMPTABLEINTERFACESWITCHDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERSPEND"

-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 591, 595, 602, 610, 613, 616, 620, 624, 631, 640, 651, 666, 678, 694, 702, 711, 716, 721, 725, 733, 738, 742, 745, 749, 751, 756, 758, 763, 769, 775, 781, 787, 794, 802, 806, 811, 815, 820, 828, 834, 841, 854, 863, 878, 892, 907, 918, 926, 930, 941, 944}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 129, 141, 143, 146, 156, 163, 170, 177, 181, 185, 193, 201, 210, 213, 218, 223, 230, 237, 243, 252, 260, 268, 274, 278, 287, 294, 298, 301, 308, 314, 317, 323, 330, 338, 342, 349, 357, 359, 361, 363, 365, 367, 369, 374, 379, 387, 390, 399, 402, 406, 414, 421, 430, 443, 446, 449, 452, 455, 458, 461, 467, 470, 473, 479, 483, 486, 490, 495, 500, 507, 512, 516, 521, 529, 537, 543, 552, 563, 575, 582, 586, 593, 601, 604, 607, 611, 615, 622, 631, 642, 657, 669, 685, 693, 702, 707, 712, 716, 724, 729, 733, 736, 740, 742, 747, 749, 754, 760, 766, 772, 778, 785, 793, 797, 802, 806, 811, 819, 825, 832, 845, 854, 869, 883, 898, 909, 917, 921, 932, 935}

 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
--- a/src/cmd/compile/internal/ir/reassignment.go
+++ b/src/cmd/compile/internal/ir/reassignment.go
@ -178,12 +178,12 @@ FindRHS:
 				break FindRHS
 			}
 		}
-		base.Fatalf("%v missing from LHS of %v", n, defn)
+		base.FatalfAt(defn.Pos(), "%v missing from LHS of %v", n, defn)
 	default:
 		return nil
 	}
 	if rhs == nil {
-		base.Fatalf("RHS is nil: %v", defn)
+		base.FatalfAt(defn.Pos(), "RHS is nil: %v", defn)
 	}

 	if _, ok := ro.singleDef[n]; !ok {
--- a/src/cmd/compile/internal/ir/symtab.go
+++ b/src/cmd/compile/internal/ir/symtab.go
@ -37,6 +37,8 @@ type symsStruct struct {
 	Msanmove          *obj.LSym
 	Newobject         *obj.LSym
 	Newproc           *obj.LSym
+	PanicBounds       *obj.LSym
+	PanicExtend       *obj.LSym
 	Panicdivide       *obj.LSym
 	Panicshift        *obj.LSym
 	PanicdottypeE     *obj.LSym
--- a/src/cmd/compile/internal/liveness/plive.go
+++ b/src/cmd/compile/internal/liveness/plive.go
@ -769,7 +769,7 @@ func (lv *Liveness) epilogue() {
 					// its stack copy is not live.
 					continue
 				}
-				// Note: zeroing is handled by zeroResults in walk.go.
+				// Note: zeroing is handled by zeroResults in ../ssagen/ssa.go.
 				livedefer.Set(int32(i))
 			}
 			if n.IsOutputParamHeapAddr() {
--- a/src/cmd/compile/internal/loong64/ssa.go
+++ b/src/cmd/compile/internal/loong64/ssa.go
@ -16,6 +16,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/loong64"
+	"internal/abi"
 )

 // isFPreg reports whether r is an FP register.
@ -663,12 +664,92 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = 0x1A

-	case ssa.OpLOONG64LoweredPanicBoundsA, ssa.OpLOONG64LoweredPanicBoundsB, ssa.OpLOONG64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+	case ssa.OpLOONG64LoweredPanicBoundsRR, ssa.OpLOONG64LoweredPanicBoundsRC, ssa.OpLOONG64LoweredPanicBoundsCR, ssa.OpLOONG64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpLOONG64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - loong64.REG_R4)
+		case ssa.OpLOONG64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - loong64.REG_R4)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(loong64.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = loong64.REG_R4 + int16(yVal)
+			}
+		case ssa.OpLOONG64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - loong64.REG_R4)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(loong64.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = loong64.REG_R4 + int16(xVal)
+			}
+		case ssa.OpLOONG64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(loong64.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = loong64.REG_R4 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(loong64.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = loong64.REG_R4 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds
+
 	case ssa.OpLOONG64LoweredAtomicLoad8, ssa.OpLOONG64LoweredAtomicLoad32, ssa.OpLOONG64LoweredAtomicLoad64:
 		// MOVB	(Rarg0), Rout
 		// DBAR	0x14
--- a/src/cmd/compile/internal/mips/ssa.go
+++ b/src/cmd/compile/internal/mips/ssa.go
@ -15,6 +15,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/mips"
+	"internal/abi"
 )

 // isFPreg reports whether r is an FP register.
@ -486,18 +487,167 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Name = obj.NAME_EXTERN
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
-	case ssa.OpMIPSLoweredPanicBoundsA, ssa.OpMIPSLoweredPanicBoundsB, ssa.OpMIPSLoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+
+	case ssa.OpMIPSLoweredPanicBoundsRR, ssa.OpMIPSLoweredPanicBoundsRC, ssa.OpMIPSLoweredPanicBoundsCR, ssa.OpMIPSLoweredPanicBoundsCC,
+		ssa.OpMIPSLoweredPanicExtendRR, ssa.OpMIPSLoweredPanicExtendRC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		extend := false
+		switch v.Op {
+		case ssa.OpMIPSLoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - mips.REG_R1)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - mips.REG_R1)
+		case ssa.OpMIPSLoweredPanicExtendRR:
+			extend = true
+			xIsReg = true
+			hi := int(v.Args[0].Reg() - mips.REG_R1)
+			lo := int(v.Args[1].Reg() - mips.REG_R1)
+			xVal = hi<<2 + lo // encode 2 register numbers
+			yIsReg = true
+			yVal = int(v.Args[2].Reg() - mips.REG_R1)
+		case ssa.OpMIPSLoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - mips.REG_R1)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(yVal)
+			}
+		case ssa.OpMIPSLoweredPanicExtendRC:
+			extend = true
+			xIsReg = true
+			hi := int(v.Args[0].Reg() - mips.REG_R1)
+			lo := int(v.Args[1].Reg() - mips.REG_R1)
+			xVal = hi<<2 + lo // encode 2 register numbers
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				for yVal == hi || yVal == lo {
+					yVal++
+				}
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(yVal)
+			}
+		case ssa.OpMIPSLoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - mips.REG_R1)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
+				// Move constant to a register
+				xIsReg = true
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(xVal)
+			} else {
+				// Move constant to two registers
+				extend = true
+				xIsReg = true
+				hi := 0
+				lo := 1
+				if hi == yVal {
+					hi = 2
+				}
+				if lo == yVal {
+					lo = 2
+				}
+				xVal = hi<<2 + lo
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c >> 32
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(hi)
+				p = s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = int64(int32(c))
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(lo)
+			}
+		case ssa.OpMIPSLoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else if signed && int64(int32(c)) == c || !signed && int64(uint32(c)) == c {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(xVal)
+			} else {
+				// Move constant to two registers
+				extend = true
+				xIsReg = true
+				hi := 0
+				lo := 1
+				xVal = hi<<2 + lo
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c >> 32
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(hi)
+				p = s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = int64(int32(c))
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(lo)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 2
+				p := s.Prog(mips.AMOVW)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(8) // space used in callee args area by assembly stubs
-	case ssa.OpMIPSLoweredPanicExtendA, ssa.OpMIPSLoweredPanicExtendB, ssa.OpMIPSLoweredPanicExtendC:
-		p := s.Prog(obj.ACALL)
-		p.To.Type = obj.TYPE_MEM
-		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.ExtendCheckFunc[v.AuxInt]
-		s.UseArgs(12) // space used in callee args area by assembly stubs
+		if extend {
+			p.To.Sym = ir.Syms.PanicExtend
+		} else {
+			p.To.Sym = ir.Syms.PanicBounds
+		}
+
 	case ssa.OpMIPSLoweredAtomicLoad8,
 		ssa.OpMIPSLoweredAtomicLoad32:
 		s.Prog(mips.ASYNC)
--- a/src/cmd/compile/internal/mips64/ssa.go
+++ b/src/cmd/compile/internal/mips64/ssa.go
@ -15,6 +15,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/mips"
+	"internal/abi"
 )

 // isFPreg reports whether r is an FP register.
@ -507,12 +508,93 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Name = obj.NAME_EXTERN
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
-	case ssa.OpMIPS64LoweredPanicBoundsA, ssa.OpMIPS64LoweredPanicBoundsB, ssa.OpMIPS64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+
+	case ssa.OpMIPS64LoweredPanicBoundsRR, ssa.OpMIPS64LoweredPanicBoundsRC, ssa.OpMIPS64LoweredPanicBoundsCR, ssa.OpMIPS64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpMIPS64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - mips.REG_R1)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - mips.REG_R1)
+		case ssa.OpMIPS64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - mips.REG_R1)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(mips.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(yVal)
+			}
+		case ssa.OpMIPS64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - mips.REG_R1)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(mips.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(xVal)
+			}
+		case ssa.OpMIPS64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(mips.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(mips.AMOVV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = mips.REG_R1 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds
+
 	case ssa.OpMIPS64LoweredAtomicLoad8, ssa.OpMIPS64LoweredAtomicLoad32, ssa.OpMIPS64LoweredAtomicLoad64:
 		as := mips.AMOVV
 		switch v.Op {
--- a/src/cmd/compile/internal/reflectdata/map_swiss.go
+++ b/src/cmd/compile/internal/reflectdata/map_swiss.go
@ -15,10 +15,10 @@ import (
 	"internal/abi"
 )

-// SwissMapGroupType makes the map slot group type given the type of the map.
-func SwissMapGroupType(t *types.Type) *types.Type {
-	if t.MapType().SwissGroup != nil {
-		return t.MapType().SwissGroup
+// MapGroupType makes the map slot group type given the type of the map.
+func MapGroupType(t *types.Type) *types.Type {
+	if t.MapType().Group != nil {
+		return t.MapType().Group
 	}

 	// Builds a type representing a group structure for the given map type.
@ -29,7 +29,7 @@ func SwissMapGroupType(t *types.Type) *types.Type {
 	//
 	// type group struct {
 	//     ctrl uint64
-	//     slots [abi.SwissMapGroupSlots]struct {
+	//     slots [abi.MapGroupSlots]struct {
 	//         key  keyType
 	//         elem elemType
 	//     }
@ -39,10 +39,10 @@ func SwissMapGroupType(t *types.Type) *types.Type {
 	elemtype := t.Elem()
 	types.CalcSize(keytype)
 	types.CalcSize(elemtype)
-	if keytype.Size() > abi.SwissMapMaxKeyBytes {
+	if keytype.Size() > abi.MapMaxKeyBytes {
 		keytype = types.NewPtr(keytype)
 	}
-	if elemtype.Size() > abi.SwissMapMaxElemBytes {
+	if elemtype.Size() > abi.MapMaxElemBytes {
 		elemtype = types.NewPtr(elemtype)
 	}

@ -53,7 +53,7 @@ func SwissMapGroupType(t *types.Type) *types.Type {
 	slot := types.NewStruct(slotFields)
 	slot.SetNoalg(true)

-	slotArr := types.NewArray(slot, abi.SwissMapGroupSlots)
+	slotArr := types.NewArray(slot, abi.MapGroupSlots)
 	slotArr.SetNoalg(true)

 	fields := []*types.Field{
@ -76,25 +76,25 @@ func SwissMapGroupType(t *types.Type) *types.Type {
 		// the end to ensure pointers are valid.
 		base.Fatalf("bad group size for %v", t)
 	}
-	if t.Key().Size() > abi.SwissMapMaxKeyBytes && !keytype.IsPtr() {
+	if t.Key().Size() > abi.MapMaxKeyBytes && !keytype.IsPtr() {
 		base.Fatalf("key indirect incorrect for %v", t)
 	}
-	if t.Elem().Size() > abi.SwissMapMaxElemBytes && !elemtype.IsPtr() {
+	if t.Elem().Size() > abi.MapMaxElemBytes && !elemtype.IsPtr() {
 		base.Fatalf("elem indirect incorrect for %v", t)
 	}

-	t.MapType().SwissGroup = group
+	t.MapType().Group = group
 	group.StructType().Map = t
 	return group
 }

-var cachedSwissTableType *types.Type
+var cachedMapTableType *types.Type

-// swissTableType returns a type interchangeable with internal/runtime/maps.table.
+// mapTableType returns a type interchangeable with internal/runtime/maps.table.
 // Make sure this stays in sync with internal/runtime/maps/table.go.
-func swissTableType() *types.Type {
-	if cachedSwissTableType != nil {
-		return cachedSwissTableType
+func mapTableType() *types.Type {
+	if cachedMapTableType != nil {
+		return cachedMapTableType
 	}

 	// type table struct {
@ -135,17 +135,17 @@ func swissTableType() *types.Type {
 		base.Fatalf("internal/runtime/maps.table size not correct: got %d, want %d", table.Size(), size)
 	}

-	cachedSwissTableType = table
+	cachedMapTableType = table
 	return table
 }

-var cachedSwissMapType *types.Type
+var cachedMapType *types.Type

-// SwissMapType returns a type interchangeable with internal/runtime/maps.Map.
+// MapType returns a type interchangeable with internal/runtime/maps.Map.
 // Make sure this stays in sync with internal/runtime/maps/map.go.
-func SwissMapType() *types.Type {
-	if cachedSwissMapType != nil {
-		return cachedSwissMapType
+func MapType() *types.Type {
+	if cachedMapType != nil {
+		return cachedMapType
 	}

 	// type Map struct {
@ -191,23 +191,23 @@ func SwissMapType() *types.Type {
 		base.Fatalf("internal/runtime/maps.Map size not correct: got %d, want %d", m.Size(), size)
 	}

-	cachedSwissMapType = m
+	cachedMapType = m
 	return m
 }

-var cachedSwissIterType *types.Type
+var cachedMapIterType *types.Type

-// SwissMapIterType returns a type interchangeable with runtime.hiter.
-// Make sure this stays in sync with runtime/map.go.
-func SwissMapIterType() *types.Type {
-	if cachedSwissIterType != nil {
-		return cachedSwissIterType
+// MapIterType returns a type interchangeable with internal/runtime/maps.Iter.
+// Make sure this stays in sync with internal/runtime/maps/table.go.
+func MapIterType() *types.Type {
+	if cachedMapIterType != nil {
+		return cachedMapIterType
 	}

 	// type Iter struct {
 	//    key  unsafe.Pointer // *Key
 	//    elem unsafe.Pointer // *Elem
-	//    typ  unsafe.Pointer // *SwissMapType
+	//    typ  unsafe.Pointer // *MapType
 	//    m    *Map
 	//
 	//    groupSlotOffset uint64
@ -231,13 +231,13 @@ func SwissMapIterType() *types.Type {
 		makefield("key", types.Types[types.TUNSAFEPTR]),  // Used in range.go for TMAP.
 		makefield("elem", types.Types[types.TUNSAFEPTR]), // Used in range.go for TMAP.
 		makefield("typ", types.Types[types.TUNSAFEPTR]),
-		makefield("m", types.NewPtr(SwissMapType())),
+		makefield("m", types.NewPtr(MapType())),
 		makefield("groupSlotOffset", types.Types[types.TUINT64]),
 		makefield("dirOffset", types.Types[types.TUINT64]),
 		makefield("clearSeq", types.Types[types.TUINT64]),
 		makefield("globalDepth", types.Types[types.TUINT8]),
 		makefield("dirIdx", types.Types[types.TINT]),
-		makefield("tab", types.NewPtr(swissTableType())),
+		makefield("tab", types.NewPtr(mapTableType())),
 		makefield("group", types.Types[types.TUNSAFEPTR]),
 		makefield("entryIdx", types.Types[types.TUINT64]),
 	}
@ -257,13 +257,13 @@ func SwissMapIterType() *types.Type {
 		base.Fatalf("internal/runtime/maps.Iter size not correct: got %d, want %d", iter.Size(), size)
 	}

-	cachedSwissIterType = iter
+	cachedMapIterType = iter
 	return iter
 }

-func writeSwissMapType(t *types.Type, lsym *obj.LSym, c rttype.Cursor) {
-	// internal/abi.SwissMapType
-	gtyp := SwissMapGroupType(t)
+func writeMapType(t *types.Type, lsym *obj.LSym, c rttype.Cursor) {
+	// internal/abi.MapType
+	gtyp := MapGroupType(t)
 	s1 := writeType(t.Key())
 	s2 := writeType(t.Elem())
 	s3 := writeType(gtyp)
@ -287,16 +287,16 @@ func writeSwissMapType(t *types.Type, lsym *obj.LSym, c rttype.Cursor) {
 	c.Field("ElemOff").WriteUintptr(uint64(elemOff))
 	var flags uint32
 	if needkeyupdate(t.Key()) {
-		flags |= abi.SwissMapNeedKeyUpdate
+		flags |= abi.MapNeedKeyUpdate
 	}
 	if hashMightPanic(t.Key()) {
-		flags |= abi.SwissMapHashMightPanic
+		flags |= abi.MapHashMightPanic
 	}
-	if t.Key().Size() > abi.SwissMapMaxKeyBytes {
-		flags |= abi.SwissMapIndirectKey
+	if t.Key().Size() > abi.MapMaxKeyBytes {
+		flags |= abi.MapIndirectKey
 	}
-	if t.Elem().Size() > abi.SwissMapMaxKeyBytes {
-		flags |= abi.SwissMapIndirectElem
+	if t.Elem().Size() > abi.MapMaxKeyBytes {
+		flags |= abi.MapIndirectElem
 	}
 	c.Field("Flags").WriteUint32(flags)

--- a/src/cmd/compile/internal/reflectdata/map_noswiss.go
+++ b/src/cmd/compile/internal/reflectdata/map_noswiss.go
@ -1,305 +0,0 @@
-// Copyright 2024 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package reflectdata
-
-import (
-	"internal/abi"
-
-	"cmd/compile/internal/base"
-	"cmd/compile/internal/ir"
-	"cmd/compile/internal/rttype"
-	"cmd/compile/internal/types"
-	"cmd/internal/obj"
-	"cmd/internal/objabi"
-	"cmd/internal/src"
-)
-
-// OldMapBucketType makes the map bucket type given the type of the map.
-func OldMapBucketType(t *types.Type) *types.Type {
-	// Builds a type representing a Bucket structure for
-	// the given map type. This type is not visible to users -
-	// we include only enough information to generate a correct GC
-	// program for it.
-	// Make sure this stays in sync with runtime/map.go.
-	//
-	//	A "bucket" is a "struct" {
-	//	      tophash [abi.OldMapBucketCount]uint8
-	//	      keys [abi.OldMapBucketCount]keyType
-	//	      elems [abi.OldMapBucketCount]elemType
-	//	      overflow *bucket
-	//	    }
-	if t.MapType().OldBucket != nil {
-		return t.MapType().OldBucket
-	}
-
-	keytype := t.Key()
-	elemtype := t.Elem()
-	types.CalcSize(keytype)
-	types.CalcSize(elemtype)
-	if keytype.Size() > abi.OldMapMaxKeyBytes {
-		keytype = types.NewPtr(keytype)
-	}
-	if elemtype.Size() > abi.OldMapMaxElemBytes {
-		elemtype = types.NewPtr(elemtype)
-	}
-
-	field := make([]*types.Field, 0, 5)
-
-	// The first field is: uint8 topbits[BUCKETSIZE].
-	arr := types.NewArray(types.Types[types.TUINT8], abi.OldMapBucketCount)
-	field = append(field, makefield("topbits", arr))
-
-	arr = types.NewArray(keytype, abi.OldMapBucketCount)
-	arr.SetNoalg(true)
-	keys := makefield("keys", arr)
-	field = append(field, keys)
-
-	arr = types.NewArray(elemtype, abi.OldMapBucketCount)
-	arr.SetNoalg(true)
-	elems := makefield("elems", arr)
-	field = append(field, elems)
-
-	// If keys and elems have no pointers, the map implementation
-	// can keep a list of overflow pointers on the side so that
-	// buckets can be marked as having no pointers.
-	// Arrange for the bucket to have no pointers by changing
-	// the type of the overflow field to uintptr in this case.
-	// See comment on hmap.overflow in runtime/map.go.
-	otyp := types.Types[types.TUNSAFEPTR]
-	if !elemtype.HasPointers() && !keytype.HasPointers() {
-		otyp = types.Types[types.TUINTPTR]
-	}
-	overflow := makefield("overflow", otyp)
-	field = append(field, overflow)
-
-	// link up fields
-	bucket := types.NewStruct(field[:])
-	bucket.SetNoalg(true)
-	types.CalcSize(bucket)
-
-	// Check invariants that map code depends on.
-	if !types.IsComparable(t.Key()) {
-		base.Fatalf("unsupported map key type for %v", t)
-	}
-	if abi.OldMapBucketCount < 8 {
-		base.Fatalf("bucket size %d too small for proper alignment %d", abi.OldMapBucketCount, 8)
-	}
-	if uint8(keytype.Alignment()) > abi.OldMapBucketCount {
-		base.Fatalf("key align too big for %v", t)
-	}
-	if uint8(elemtype.Alignment()) > abi.OldMapBucketCount {
-		base.Fatalf("elem align %d too big for %v, BUCKETSIZE=%d", elemtype.Alignment(), t, abi.OldMapBucketCount)
-	}
-	if keytype.Size() > abi.OldMapMaxKeyBytes {
-		base.Fatalf("key size too large for %v", t)
-	}
-	if elemtype.Size() > abi.OldMapMaxElemBytes {
-		base.Fatalf("elem size too large for %v", t)
-	}
-	if t.Key().Size() > abi.OldMapMaxKeyBytes && !keytype.IsPtr() {
-		base.Fatalf("key indirect incorrect for %v", t)
-	}
-	if t.Elem().Size() > abi.OldMapMaxElemBytes && !elemtype.IsPtr() {
-		base.Fatalf("elem indirect incorrect for %v", t)
-	}
-	if keytype.Size()%keytype.Alignment() != 0 {
-		base.Fatalf("key size not a multiple of key align for %v", t)
-	}
-	if elemtype.Size()%elemtype.Alignment() != 0 {
-		base.Fatalf("elem size not a multiple of elem align for %v", t)
-	}
-	if uint8(bucket.Alignment())%uint8(keytype.Alignment()) != 0 {
-		base.Fatalf("bucket align not multiple of key align %v", t)
-	}
-	if uint8(bucket.Alignment())%uint8(elemtype.Alignment()) != 0 {
-		base.Fatalf("bucket align not multiple of elem align %v", t)
-	}
-	if keys.Offset%keytype.Alignment() != 0 {
-		base.Fatalf("bad alignment of keys in bmap for %v", t)
-	}
-	if elems.Offset%elemtype.Alignment() != 0 {
-		base.Fatalf("bad alignment of elems in bmap for %v", t)
-	}
-
-	// Double-check that overflow field is final memory in struct,
-	// with no padding at end.
-	if overflow.Offset != bucket.Size()-int64(types.PtrSize) {
-		base.Fatalf("bad offset of overflow in bmap for %v, overflow.Offset=%d, bucket.Size()-int64(types.PtrSize)=%d",
-			t, overflow.Offset, bucket.Size()-int64(types.PtrSize))
-	}
-
-	t.MapType().OldBucket = bucket
-
-	bucket.StructType().Map = t
-	return bucket
-}
-
-var oldHmapType *types.Type
-
-// OldMapType returns a type interchangeable with runtime.hmap.
-// Make sure this stays in sync with runtime/map.go.
-func OldMapType() *types.Type {
-	if oldHmapType != nil {
-		return oldHmapType
-	}
-
-	// build a struct:
-	// type hmap struct {
-	//    count      int
-	//    flags      uint8
-	//    B          uint8
-	//    noverflow  uint16
-	//    hash0      uint32
-	//    buckets    unsafe.Pointer
-	//    oldbuckets unsafe.Pointer
-	//    nevacuate  uintptr
-	//    clearSeq   uint64
-	//    extra      unsafe.Pointer // *mapextra
-	// }
-	// must match runtime/map.go:hmap.
-	fields := []*types.Field{
-		makefield("count", types.Types[types.TINT]),
-		makefield("flags", types.Types[types.TUINT8]),
-		makefield("B", types.Types[types.TUINT8]),
-		makefield("noverflow", types.Types[types.TUINT16]),
-		makefield("hash0", types.Types[types.TUINT32]),      // Used in walk.go for OMAKEMAP.
-		makefield("buckets", types.Types[types.TUNSAFEPTR]), // Used in walk.go for OMAKEMAP.
-		makefield("oldbuckets", types.Types[types.TUNSAFEPTR]),
-		makefield("nevacuate", types.Types[types.TUINTPTR]),
-		makefield("clearSeq", types.Types[types.TUINT64]),
-		makefield("extra", types.Types[types.TUNSAFEPTR]),
-	}
-
-	n := ir.NewDeclNameAt(src.NoXPos, ir.OTYPE, ir.Pkgs.Runtime.Lookup("hmap"))
-	hmap := types.NewNamed(n)
-	n.SetType(hmap)
-	n.SetTypecheck(1)
-
-	hmap.SetUnderlying(types.NewStruct(fields))
-	types.CalcSize(hmap)
-
-	// The size of hmap should be 56 bytes on 64 bit
-	// and 36 bytes on 32 bit platforms.
-	if size := int64(2*8 + 5*types.PtrSize); hmap.Size() != size {
-		base.Fatalf("hmap size not correct: got %d, want %d", hmap.Size(), size)
-	}
-
-	oldHmapType = hmap
-	return hmap
-}
-
-var oldHiterType *types.Type
-
-// OldMapIterType returns a type interchangeable with runtime.hiter.
-// Make sure this stays in sync with runtime/map.go.
-func OldMapIterType() *types.Type {
-	if oldHiterType != nil {
-		return oldHiterType
-	}
-
-	hmap := OldMapType()
-
-	// build a struct:
-	// type hiter struct {
-	//    key         unsafe.Pointer // *Key
-	//    elem        unsafe.Pointer // *Elem
-	//    t           unsafe.Pointer // *OldMapType
-	//    h           *hmap
-	//    buckets     unsafe.Pointer
-	//    bptr        unsafe.Pointer // *bmap
-	//    overflow    unsafe.Pointer // *[]*bmap
-	//    oldoverflow unsafe.Pointer // *[]*bmap
-	//    startBucket uintptr
-	//    offset      uint8
-	//    wrapped     bool
-	//    B           uint8
-	//    i           uint8
-	//    bucket      uintptr
-	//    checkBucket uintptr
-	//    clearSeq    uint64
-	// }
-	// must match runtime/map.go:hiter.
-	fields := []*types.Field{
-		makefield("key", types.Types[types.TUNSAFEPTR]),  // Used in range.go for TMAP.
-		makefield("elem", types.Types[types.TUNSAFEPTR]), // Used in range.go for TMAP.
-		makefield("t", types.Types[types.TUNSAFEPTR]),
-		makefield("h", types.NewPtr(hmap)),
-		makefield("buckets", types.Types[types.TUNSAFEPTR]),
-		makefield("bptr", types.Types[types.TUNSAFEPTR]),
-		makefield("overflow", types.Types[types.TUNSAFEPTR]),
-		makefield("oldoverflow", types.Types[types.TUNSAFEPTR]),
-		makefield("startBucket", types.Types[types.TUINTPTR]),
-		makefield("offset", types.Types[types.TUINT8]),
-		makefield("wrapped", types.Types[types.TBOOL]),
-		makefield("B", types.Types[types.TUINT8]),
-		makefield("i", types.Types[types.TUINT8]),
-		makefield("bucket", types.Types[types.TUINTPTR]),
-		makefield("checkBucket", types.Types[types.TUINTPTR]),
-		makefield("clearSeq", types.Types[types.TUINT64]),
-	}
-
-	// build iterator struct holding the above fields
-	n := ir.NewDeclNameAt(src.NoXPos, ir.OTYPE, ir.Pkgs.Runtime.Lookup("hiter"))
-	hiter := types.NewNamed(n)
-	n.SetType(hiter)
-	n.SetTypecheck(1)
-
-	hiter.SetUnderlying(types.NewStruct(fields))
-	types.CalcSize(hiter)
-	if hiter.Size() != int64(8+12*types.PtrSize) {
-		base.Fatalf("hash_iter size not correct %d %d", hiter.Size(), 8+12*types.PtrSize)
-	}
-
-	oldHiterType = hiter
-	return hiter
-}
-
-func writeOldMapType(t *types.Type, lsym *obj.LSym, c rttype.Cursor) {
-	// internal/abi.OldMapType
-	s1 := writeType(t.Key())
-	s2 := writeType(t.Elem())
-	s3 := writeType(OldMapBucketType(t))
-	hasher := genhash(t.Key())
-
-	c.Field("Key").WritePtr(s1)
-	c.Field("Elem").WritePtr(s2)
-	c.Field("Bucket").WritePtr(s3)
-	c.Field("Hasher").WritePtr(hasher)
-	var flags uint32
-	// Note: flags must match maptype accessors in ../../../../runtime/type.go
-	// and maptype builder in ../../../../reflect/type.go:MapOf.
-	if t.Key().Size() > abi.OldMapMaxKeyBytes {
-		c.Field("KeySize").WriteUint8(uint8(types.PtrSize))
-		flags |= 1 // indirect key
-	} else {
-		c.Field("KeySize").WriteUint8(uint8(t.Key().Size()))
-	}
-
-	if t.Elem().Size() > abi.OldMapMaxElemBytes {
-		c.Field("ValueSize").WriteUint8(uint8(types.PtrSize))
-		flags |= 2 // indirect value
-	} else {
-		c.Field("ValueSize").WriteUint8(uint8(t.Elem().Size()))
-	}
-	c.Field("BucketSize").WriteUint16(uint16(OldMapBucketType(t).Size()))
-	if types.IsReflexive(t.Key()) {
-		flags |= 4 // reflexive key
-	}
-	if needkeyupdate(t.Key()) {
-		flags |= 8 // need key update
-	}
-	if hashMightPanic(t.Key()) {
-		flags |= 16 // hash might panic
-	}
-	c.Field("Flags").WriteUint32(flags)
-
-	if u := t.Underlying(); u != t {
-		// If t is a named map type, also keep the underlying map
-		// type live in the binary. This is important to make sure that
-		// a named map and that same map cast to its underlying type via
-		// reflection, use the same hash function. See issue 37716.
-		lsym.AddRel(base.Ctxt, obj.Reloc{Type: objabi.R_KEEP, Sym: writeType(u)})
-	}
-}
--- a/src/cmd/compile/internal/reflectdata/reflect.go
+++ b/src/cmd/compile/internal/reflectdata/reflect.go
@ -8,7 +8,6 @@ import (
 	"encoding/binary"
 	"fmt"
 	"internal/abi"
-	"internal/buildcfg"
 	"slices"
 	"sort"
 	"strings"
@ -491,6 +490,9 @@ func dcommontype(c rttype.Cursor, t *types.Type) {
 			exported = types.IsExported(t.Elem().Sym().Name)
 		}
 	}
+	if types.IsDirectIface(t) {
+		tflag |= abi.TFlagDirectIface
+	}

 	if tflag != abi.TFlag(uint8(tflag)) {
 		// this should optimize away completely
@ -511,9 +513,6 @@ func dcommontype(c rttype.Cursor, t *types.Type) {
 	c.Field("FieldAlign_").WriteUint8(uint8(t.Alignment()))

 	kind := kinds[t.Kind()]
-	if types.IsDirectIface(t) {
-		kind |= abi.KindDirectIface
-	}
 	c.Field("Kind_").WriteUint8(uint8(kind))

 	c.Field("Equal").WritePtr(eqfunc)
@ -773,11 +772,7 @@ func writeType(t *types.Type) *obj.LSym {
 		rt = rttype.InterfaceType
 		dataAdd = len(imethods(t)) * int(rttype.IMethod.Size())
 	case types.TMAP:
-		if buildcfg.Experiment.SwissMap {
-			rt = rttype.SwissMapType
-		} else {
-			rt = rttype.OldMapType
-		}
+		rt = rttype.MapType
 	case types.TPTR:
 		rt = rttype.PtrType
 		// TODO: use rttype.Type for Elem() is ANY?
@ -877,11 +872,7 @@ func writeType(t *types.Type) *obj.LSym {
 		}

 	case types.TMAP:
-		if buildcfg.Experiment.SwissMap {
-			writeSwissMapType(t, lsym, c)
-		} else {
-			writeOldMapType(t, lsym, c)
-		}
+		writeMapType(t, lsym, c)

 	case types.TPTR:
 		// internal/abi.PtrType
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@ -14,6 +14,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/riscv"
+	"internal/abi"
 )

 // ssaRegToReg maps ssa register numbers to obj register numbers.
@ -508,12 +509,91 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Name = obj.NAME_EXTERN
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
-	case ssa.OpRISCV64LoweredPanicBoundsA, ssa.OpRISCV64LoweredPanicBoundsB, ssa.OpRISCV64LoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+
+	case ssa.OpRISCV64LoweredPanicBoundsRR, ssa.OpRISCV64LoweredPanicBoundsRC, ssa.OpRISCV64LoweredPanicBoundsCR, ssa.OpRISCV64LoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpRISCV64LoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - riscv.REG_X5)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - riscv.REG_X5)
+		case ssa.OpRISCV64LoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - riscv.REG_X5)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(riscv.AMOV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = riscv.REG_X5 + int16(yVal)
+			}
+		case ssa.OpRISCV64LoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - riscv.REG_X5)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(riscv.AMOV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = riscv.REG_X5 + int16(xVal)
+			}
+		case ssa.OpRISCV64LoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(riscv.AMOV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = riscv.REG_X5 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(riscv.AMOV)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = riscv.REG_X5 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds

 	case ssa.OpRISCV64LoweredAtomicLoad8:
 		s.Prog(riscv.AFENCE)
--- a/src/cmd/compile/internal/rttype/rttype.go
+++ b/src/cmd/compile/internal/rttype/rttype.go
@ -27,8 +27,7 @@ var ArrayType *types.Type
 var ChanType *types.Type
 var FuncType *types.Type
 var InterfaceType *types.Type
-var OldMapType *types.Type
-var SwissMapType *types.Type
+var MapType *types.Type
 var PtrType *types.Type
 var SliceType *types.Type
 var StructType *types.Type
@ -55,8 +54,7 @@ func Init() {
 	ChanType = FromReflect(reflect.TypeOf(abi.ChanType{}))
 	FuncType = FromReflect(reflect.TypeOf(abi.FuncType{}))
 	InterfaceType = FromReflect(reflect.TypeOf(abi.InterfaceType{}))
-	OldMapType = FromReflect(reflect.TypeOf(abi.OldMapType{}))
-	SwissMapType = FromReflect(reflect.TypeOf(abi.SwissMapType{}))
+	MapType = FromReflect(reflect.TypeOf(abi.MapType{}))
 	PtrType = FromReflect(reflect.TypeOf(abi.PtrType{}))
 	SliceType = FromReflect(reflect.TypeOf(abi.SliceType{}))
 	StructType = FromReflect(reflect.TypeOf(abi.StructType{}))
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@ -15,6 +15,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/obj/s390x"
+	"internal/abi"
 )

 // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
@ -281,6 +282,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 	case ssa.OpS390XCPSDR:
 		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
 		p.Reg = v.Args[0].Reg()
+	case ssa.OpS390XWFMAXDB, ssa.OpS390XWFMAXSB,
+		ssa.OpS390XWFMINDB, ssa.OpS390XWFMINSB:
+		p := opregregimm(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg(), 1 /* Java Math.Max() */)
+		p.AddRestSource(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
 	case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
 		ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
 		ssa.OpS390XMODD, ssa.OpS390XMODW,
@ -569,12 +574,92 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Name = obj.NAME_EXTERN
 		// AuxInt encodes how many buffer entries we need.
 		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
-	case ssa.OpS390XLoweredPanicBoundsA, ssa.OpS390XLoweredPanicBoundsB, ssa.OpS390XLoweredPanicBoundsC:
-		p := s.Prog(obj.ACALL)
+
+	case ssa.OpS390XLoweredPanicBoundsRR, ssa.OpS390XLoweredPanicBoundsRC, ssa.OpS390XLoweredPanicBoundsCR, ssa.OpS390XLoweredPanicBoundsCC:
+		// Compute the constant we put in the PCData entry for this call.
+		code, signed := ssa.BoundsKind(v.AuxInt).Code()
+		xIsReg := false
+		yIsReg := false
+		xVal := 0
+		yVal := 0
+		switch v.Op {
+		case ssa.OpS390XLoweredPanicBoundsRR:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - s390x.REG_R0)
+			yIsReg = true
+			yVal = int(v.Args[1].Reg() - s390x.REG_R0)
+		case ssa.OpS390XLoweredPanicBoundsRC:
+			xIsReg = true
+			xVal = int(v.Args[0].Reg() - s390x.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				if yVal == xVal {
+					yVal = 1
+				}
+				p := s.Prog(s390x.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = s390x.REG_R0 + int16(yVal)
+			}
+		case ssa.OpS390XLoweredPanicBoundsCR:
+			yIsReg = true
+			yVal := int(v.Args[0].Reg() - s390x.REG_R0)
+			c := v.Aux.(ssa.PanicBoundsC).C
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				if xVal == yVal {
+					xVal = 1
+				}
+				p := s.Prog(s390x.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = s390x.REG_R0 + int16(xVal)
+			}
+		case ssa.OpS390XLoweredPanicBoundsCC:
+			c := v.Aux.(ssa.PanicBoundsCC).Cx
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				xVal = int(c)
+			} else {
+				// Move constant to a register
+				xIsReg = true
+				p := s.Prog(s390x.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = s390x.REG_R0 + int16(xVal)
+			}
+			c = v.Aux.(ssa.PanicBoundsCC).Cy
+			if c >= 0 && c <= abi.BoundsMaxConst {
+				yVal = int(c)
+			} else {
+				// Move constant to a register
+				yIsReg = true
+				yVal = 1
+				p := s.Prog(s390x.AMOVD)
+				p.From.Type = obj.TYPE_CONST
+				p.From.Offset = c
+				p.To.Type = obj.TYPE_REG
+				p.To.Reg = s390x.REG_R0 + int16(yVal)
+			}
+		}
+		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
+
+		p := s.Prog(obj.APCDATA)
+		p.From.SetConst(abi.PCDATA_PanicBounds)
+		p.To.SetConst(int64(c))
+		p = s.Prog(obj.ACALL)
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
-		p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
-		s.UseArgs(16) // space used in callee args area by assembly stubs
+		p.To.Sym = ir.Syms.PanicBounds
+
 	case ssa.OpS390XFLOGR, ssa.OpS390XPOPCNT,
 		ssa.OpS390XNEG, ssa.OpS390XNEGW,
 		ssa.OpS390XMOVWBR, ssa.OpS390XMOVDBR:
--- a/src/cmd/compile/internal/ssa/_gen/386.rules
+++ b/src/cmd/compile/internal/ssa/_gen/386.rules
@ -363,13 +363,16 @@
 // Write barrier.
 (WB ...) => (LoweredWB ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(PanicExtend ...) => (LoweredPanicExtendRR ...)

-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem)
+(LoweredPanicBoundsRR [kind] x (MOVLconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicBoundsRR [kind] (MOVLconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVLconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+
+(LoweredPanicExtendRR [kind] hi lo (MOVLconst [c]) mem) => (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicExtendRR [kind] (MOVLconst [hi]) (MOVLconst [lo]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+(LoweredPanicExtendRC [kind] {p} (MOVLconst [hi]) (MOVLconst [lo]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)

 // ***************************
 // Above: lowering rules
--- a/src/cmd/compile/internal/ssa/_gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/386Ops.go
@ -76,7 +76,6 @@ func init() {
 		cx         = buildReg("CX")
 		dx         = buildReg("DX")
 		bx         = buildReg("BX")
-		si         = buildReg("SI")
 		gp         = buildReg("AX CX DX BX BP SI DI")
 		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7")
 		gpsp       = gp | buildReg("SP")
@ -523,16 +522,19 @@ func init() {
 		// Returns a pointer to a write barrier buffer in DI.
 		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: callerSave &^ gp, outputs: []regMask{buildReg("DI")}}, clobberFlags: true, aux: "Int64"},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		// Extend ops are the same as Bounds ops except the indexes are 64-bit.
-		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, dx, bx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, cx, dx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, ax, cx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp, gp}}, typ: "Mem", call: true},    // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp}}, typ: "Mem", call: true}, // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp}}, typ: "Mem", call: true}, // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                     // arg0=mem, returns memory.
+
+		// Same as above, but the x value is 64 bits.
+		{name: "LoweredPanicExtendRR", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{ax | cx | dx | bx, ax | cx | dx | bx, gp}}, typ: "Mem", call: true},    // arg0=x_hi, arg1=x_lo, arg2=y, arg3=mem, returns memory.
+		{name: "LoweredPanicExtendRC", argLength: 3, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{ax | cx | dx | bx, ax | cx | dx | bx}}, typ: "Mem", call: true}, // arg0=x_hi, arg1=x_lo, arg2=mem, returns memory.

 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
--- a/src/cmd/compile/internal/ssa/_gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64.rules
@ -375,34 +375,17 @@
 	(MOVQstoreconst [makeValAndOff(0,int32(s-8))] destptr
 		(MOVQstoreconst [makeValAndOff(0,0)] destptr mem))

-// Adjust zeros to be a multiple of 16 bytes.
-(Zero [s] destptr mem) && s%16 != 0 && s > 16 =>
-	(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
-		(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
+// Zeroing up to 192 bytes uses straightline code.
+(Zero [s] destptr mem)	&& s >= 16 && s < 192 => (LoweredZero [s] destptr mem)

-(Zero [16] destptr mem) =>
-	(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)
-(Zero [32] destptr mem) =>
-	(MOVOstoreconst [makeValAndOff(0,16)] destptr
-		(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
-(Zero [48] destptr mem) =>
-	(MOVOstoreconst [makeValAndOff(0,32)] destptr
-		(MOVOstoreconst [makeValAndOff(0,16)] destptr
-			(MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))
-(Zero [64] destptr mem) =>
-	(MOVOstoreconst [makeValAndOff(0,48)] destptr
-		(MOVOstoreconst [makeValAndOff(0,32)] destptr
-			(MOVOstoreconst [makeValAndOff(0,16)] destptr
-				(MOVOstoreconst [makeValAndOff(0,0)] destptr mem))))
-
-// Medium zeroing uses a duff device.
-(Zero [s] destptr mem)
-	&& s > 64 && s <= 1024 && s%16 == 0 =>
-	(DUFFZERO [s] destptr mem)
+// Zeroing up to ~1KB uses a small loop.
+(Zero [s] destptr mem)	&& s >= 192 && s <= repZeroThreshold => (LoweredZeroLoop [s] destptr mem)

 // Large zeroing uses REP STOSQ.
-(Zero [s] destptr mem)
-	&& s > 1024 && s%8 == 0 =>
+(Zero [s] destptr mem) && s > repZeroThreshold && s%8 != 0 =>
+       (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
+               (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
+(Zero [s] destptr mem) && s > repZeroThreshold && s%8 == 0 =>
 	(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)

 // Lowering constants
@ -558,9 +541,11 @@
 // Write barrier.
 (WB ...) => (LoweredWB ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVQconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVQconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVQconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVQconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // lowering rotates
 (RotateLeft8  ...) => (ROLB ...)
--- a/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/AMD64Ops.go
@ -121,7 +121,6 @@ func init() {
 		ax         = buildReg("AX")
 		cx         = buildReg("CX")
 		dx         = buildReg("DX")
-		bx         = buildReg("BX")
 		gp         = buildReg("AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15")
 		g          = buildReg("g")
 		fp         = buildReg("X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14")
@ -964,15 +963,30 @@ func init() {
 		// auxint = # of bytes to zero
 		// returns mem
 		{
-			name:      "DUFFZERO",
+			name:      "LoweredZero",
 			aux:       "Int64",
 			argLength: 2,
 			reg: regInfo{
-				inputs:   []regMask{buildReg("DI")},
-				clobbers: buildReg("DI"),
+				inputs: []regMask{gp},
 			},
-			//faultOnNilArg0: true, // Note: removed for 73748. TODO: reenable at some point
-			unsafePoint: true, // FP maintenance around DUFFCOPY can be clobbered by interrupts
+			faultOnNilArg0: true,
+		},
+
+		// arg0 = pointer to start of memory to zero
+		// arg1 = mem
+		// auxint = # of bytes to zero
+		// returns mem
+		{
+			name:      "LoweredZeroLoop",
+			aux:       "Int64",
+			argLength: 2,
+			reg: regInfo{
+				inputs:       []regMask{gp},
+				clobbersArg0: true,
+			},
+			clobberFlags:   true,
+			faultOnNilArg0: true,
+			needIntTemp:    true,
 		},

 		// arg0 = address of memory to zero
@ -1060,12 +1074,15 @@ func init() {

 		{name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "UInt64", aux: "Sym", symEffect: "None"},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp, gp}}, typ: "Mem", call: true},    // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp}}, typ: "Mem", call: true}, // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp}}, typ: "Mem", call: true}, // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                     // arg0=mem, returns memory.

 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
--- a/src/cmd/compile/internal/ssa/_gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM.rules
@ -395,13 +395,16 @@
 // Write barrier.
 (WB ...) => (LoweredWB ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(PanicExtend ...) => (LoweredPanicExtendRR ...)

-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem)
+(LoweredPanicBoundsRR [kind] x (MOVWconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicBoundsRR [kind] (MOVWconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVWconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+
+(LoweredPanicExtendRR [kind] hi lo (MOVWconst [c]) mem) => (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicExtendRR [kind] (MOVWconst [hi]) (MOVWconst [lo]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+(LoweredPanicExtendRC [kind] {p} (MOVWconst [hi]) (MOVWconst [lo]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)

 // Optimizations

--- a/src/cmd/compile/internal/ssa/_gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64.rules
@ -601,9 +601,11 @@
 // Publication barrier (0xe is ST option)
 (PubBarrier mem) => (DMB [0xe] mem)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // Optimizations

@ -683,6 +685,14 @@
 ((EQ|NE) (CMPconst  [0] x) yes no) => ((Z|NZ)   x yes no)
 ((EQ|NE) (CMPWconst [0] x) yes no) => ((ZW|NZW) x yes no)

+((ULE|UGT) (CMPconst  [0] x)) => ((EQ|NE) (CMPconst  [0] x))
+((ULE|UGT) (CMPWconst [0] x)) => ((EQ|NE) (CMPWconst [0] x))
+
+((Z|NZ)   sub:(SUB        x y)) && sub.Uses == 1 => ((EQ|NE) (CMP                x y))
+((ZW|NZW) sub:(SUB        x y)) && sub.Uses == 1 => ((EQ|NE) (CMPW               x y))
+((Z|NZ)   sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPconst         [c] y))
+((ZW|NZW) sub:(SUBconst [c] y)) && sub.Uses == 1 => ((EQ|NE) (CMPWconst [int32(c)] y))
+
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(MADD a x y))  yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMN  a (MUL  <x.Type> x y)) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPconst  [0] z:(MSUB a x y))  yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMP  a (MUL  <x.Type> x y)) yes no)
 ((EQ|NE|LT|LE|GT|GE) (CMPWconst [0] z:(MADDW a x y)) yes no) && z.Uses==1 => ((EQ|NE|LTnoov|LEnoov|GTnoov|GEnoov) (CMNW a (MULW <x.Type> x y)) yes no)
@ -1658,6 +1668,10 @@
 (SRLconst [rc] (MOVHUreg x)) && rc >= 16 => (MOVDconst [0])
 (SRLconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVDconst [0])

+// Special cases for slice operations
+(ADD x0 x1:(ANDshiftRA x2:(SLLconst [sl] y) z [63])) && x1.Uses == 1 && x2.Uses == 1 => (ADDshiftLL x0 (ANDshiftRA <y.Type> y z [63]) [sl])
+(ADD x0 x1:(ANDshiftLL x2:(SRAconst [63] z) y [sl])) && x1.Uses == 1 && x2.Uses == 1 => (ADDshiftLL x0 (ANDshiftRA <y.Type> y z [63]) [sl])
+
 // bitfield ops

 // sbfiz
--- a/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARM64Ops.go
@ -144,11 +144,8 @@ func init() {
 		gpspsbg    = gpspg | buildReg("SB")
 		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
 		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-		r0         = buildReg("R0")
-		r1         = buildReg("R1")
-		r2         = buildReg("R2")
-		r3         = buildReg("R3")
 		rz         = buildReg("ZERO")
+		first16    = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15")
 	)
 	// Common regInfo
 	var (
@ -760,12 +757,15 @@ func init() {
 		// Returns a pointer to a write barrier buffer in R25.
 		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R16 R17 R30"), outputs: []regMask{buildReg("R25")}}, clobberFlags: true, aux: "Int64"},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16, first16}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                            // arg0=mem, returns memory.

 		// Prefetch instruction
 		// Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option.
--- a/src/cmd/compile/internal/ssa/_gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/ARMOps.go
@ -94,11 +94,11 @@ func init() {
 		gpspsbg    = gpspg | buildReg("SB")
 		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15")
 		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
+		lr         = buildReg("R14")
 		r0         = buildReg("R0")
 		r1         = buildReg("R1")
 		r2         = buildReg("R2")
 		r3         = buildReg("R3")
-		r4         = buildReg("R4")
 	)
 	// Common regInfo
 	var (
@ -540,16 +540,19 @@ func init() {
 		// See runtime/stubs.go for a more detailed discussion.
 		{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		// Extend ops are the same as Bounds ops except the indexes are 64-bit.
-		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r2, r3}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r1, r2}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r0, r1}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp &^ lr, gp &^ lr}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                              // arg0=mem, returns memory.
+
+		// Same as above, but the x value is 64 bits.
+		{name: "LoweredPanicExtendRR", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r0 | r1 | r2 | r3, r0 | r1 | r2 | r3, gp}}, typ: "Mem", call: true},    // arg0=x_hi, arg1=x_lo, arg2=y, arg3=mem, returns memory.
+		{name: "LoweredPanicExtendRC", argLength: 3, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{r0 | r1 | r2 | r3, r0 | r1 | r2 | r3}}, typ: "Mem", call: true}, // arg0=x_hi, arg1=x_lo, arg2=mem, returns memory.

 		// Constant flag value.
 		// Note: there's an "unordered" outcome for floating-point
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules
@ -527,9 +527,11 @@
 // Publication barrier as intrinsic
 (PubBarrier ...) => (LoweredPubBarrier ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVVconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVVconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVVconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVVconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 (CondSelect <t> x y cond) => (OR (MASKEQZ <t> x cond) (MASKNEZ <t> y cond))

@ -748,10 +750,10 @@
 (SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0])

 // mul by constant
-(MULV x (MOVVconst [-1])) => (NEGV x)
 (MULV _ (MOVVconst [0])) => (MOVVconst [0])
 (MULV x (MOVVconst [1])) => x
-(MULV x (MOVVconst [c])) && isPowerOfTwo(c) => (SLLVconst [log64(c)] x)
+
+(MULV  x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}

 // div by constant
 (DIVVU x (MOVVconst [1])) => x
@ -843,6 +845,14 @@

 (MOVBUreg (ANDconst [c] x)) => (ANDconst [c&0xff] x)

+// Avoid extending when already sufficiently masked.
+(MOVBreg  x:(ANDconst [c] y)) && c >= 0 && int64(int8(c)) == c => x
+(MOVHreg  x:(ANDconst [c] y)) && c >= 0 && int64(int16(c)) == c => x
+(MOVWreg  x:(ANDconst [c] y)) && c >= 0 && int64(int32(c)) == c => x
+(MOVBUreg x:(ANDconst [c] y)) && c >= 0 && int64(uint8(c)) == c => x
+(MOVHUreg x:(ANDconst [c] y)) && c >= 0 && int64(uint16(c)) == c => x
+(MOVWUreg x:(ANDconst [c] y)) && c >= 0 && int64(uint32(c)) == c => x
+
 // Prefetch instructions (hint specified using aux field)
 // For PRELD{,X} A value of hint indicates:
 //    hint=0 is defined as load prefetch to L1-cache
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go
@ -130,10 +130,7 @@ func init() {
 		gpspsbg    = gpspg | buildReg("SB")
 		fp         = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
 		callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-		r1         = buildReg("R20")
-		r2         = buildReg("R21")
-		r3         = buildReg("R23")
-		r4         = buildReg("R24")
+		first16    = buildReg("R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19")
 	)
 	// Common regInfo
 	var (
@ -563,12 +560,15 @@ func init() {
 		// Do data barrier. arg0=memorys
 		{name: "LoweredPubBarrier", argLength: 1, asm: "DBAR", hasSideEffects: true},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16, first16}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                            // arg0=mem, returns memory.

 		// Prefetch instruction
 		// Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option.
--- a/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules
+++ b/src/cmd/compile/internal/ssa/_gen/LOONG64latelower.rules
@ -0,0 +1,6 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Prefer addition when shifting left by one.
+(SLLVconst [1] x) => (ADDV x x)
--- a/src/cmd/compile/internal/ssa/_gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS.rules
@ -423,13 +423,17 @@
 // Publication barrier as intrinsic
 (PubBarrier ...) => (LoweredPubBarrier ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)

-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 0 => (LoweredPanicExtendA [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 1 => (LoweredPanicExtendB [kind] hi lo y mem)
-(PanicExtend [kind] hi lo y mem) && boundsABI(kind) == 2 => (LoweredPanicExtendC [kind] hi lo y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(PanicExtend ...) => (LoweredPanicExtendRR ...)
+
+(LoweredPanicBoundsRR [kind] x (MOVWconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicBoundsRR [kind] (MOVWconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVWconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+
+(LoweredPanicExtendRR [kind] hi lo (MOVWconst [c]) mem) => (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+(LoweredPanicExtendRR [kind] (MOVWconst [hi]) (MOVWconst [lo]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+(LoweredPanicExtendRC [kind] {p} (MOVWconst [hi]) (MOVWconst [lo]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)

 // Optimizations

@ -607,13 +611,13 @@
 (Select0 (MULTU (MOVWconst [1])  _ )) => (MOVWconst [0])
 (Select1 (MULTU (MOVWconst [-1]) x )) => (NEG <x.Type> x)
 (Select0 (MULTU (MOVWconst [-1]) x )) => (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
-(Select1 (MULTU (MOVWconst [c])  x )) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
-(Select0 (MULTU (MOVWconst [c])  x )) && isPowerOfTwo(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x)
+(Select1 (MULTU (MOVWconst [c])  x )) && isUnsignedPowerOfTwo(uint32(c)) => (SLLconst [int32(log32u(uint32(c)))] x)
+(Select0 (MULTU (MOVWconst [c])  x )) && isUnsignedPowerOfTwo(uint32(c)) => (SRLconst [int32(32-log32u(uint32(c)))] x)

 (MUL (MOVWconst [0])  _ ) => (MOVWconst [0])
 (MUL (MOVWconst [1])  x ) => x
 (MUL (MOVWconst [-1]) x ) => (NEG x)
-(MUL (MOVWconst [c]) x ) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
+(MUL (MOVWconst [c]) x ) && isUnsignedPowerOfTwo(uint32(c)) => (SLLconst [int32(log32u(uint32(c)))] x)

 // generic simplifications
 (ADD x (NEG y)) => (SUB x y)
--- a/src/cmd/compile/internal/ssa/_gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS64.rules
@ -479,9 +479,11 @@
 // Publication barrier as intrinsic
 (PubBarrier ...) => (LoweredPubBarrier ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVVconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVVconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVVconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVVconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // Optimizations

--- a/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/MIPS64Ops.go
@ -136,10 +136,7 @@ func init() {
 		lo         = buildReg("LO")
 		hi         = buildReg("HI")
 		callerSave = gp | fp | lo | hi | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-		r1         = buildReg("R1")
-		r2         = buildReg("R2")
-		r3         = buildReg("R3")
-		r4         = buildReg("R4")
+		first16    = buildReg("R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16")
 	)
 	// Common regInfo
 	var (
@ -469,12 +466,15 @@ func init() {
 		// Do data barrier. arg0=memorys
 		{name: "LoweredPubBarrier", argLength: 1, asm: "SYNC", hasSideEffects: true},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16, first16}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                            // arg0=mem, returns memory.
 	}

 	blocks := []blockData{
--- a/src/cmd/compile/internal/ssa/_gen/MIPSOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/MIPSOps.go
@ -120,11 +120,8 @@ func init() {
 		lo         = buildReg("LO")
 		hi         = buildReg("HI")
 		callerSave = gp | fp | lo | hi | buildReg("g") // runtime.setg (and anything calling it) may clobber g
-		r1         = buildReg("R1")
-		r2         = buildReg("R2")
-		r3         = buildReg("R3")
-		r4         = buildReg("R4")
-		r5         = buildReg("R5")
+		first16    = buildReg("R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16")
+		first4     = buildReg("R1 R2 R3 R4")
 	)
 	// Common regInfo
 	var (
@ -411,16 +408,19 @@ func init() {
 		// Do data barrier. arg0=memorys
 		{name: "LoweredPubBarrier", argLength: 1, asm: "SYNC", hasSideEffects: true},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		// Extend ops are the same as Bounds ops except the indexes are 64-bit.
-		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r3, r4}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r2, r3}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r5, r1, r2}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16, first16}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true},   // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                            // arg0=mem, returns memory.
+
+		// Same as above, but the x value is 64 bits.
+		{name: "LoweredPanicExtendRR", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{first4, first4, first16}}, typ: "Mem", call: true}, // arg0=x_hi, arg1=x_lo, arg2=y, arg3=mem, returns memory.
+		{name: "LoweredPanicExtendRC", argLength: 3, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first4, first4}}, typ: "Mem", call: true},   // arg0=x_hi, arg1=x_lo, arg2=mem, returns memory.
 	}

 	blocks := []blockData{
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64.rules
@ -407,9 +407,11 @@
 // Publication barrier as intrinsic
 (PubBarrier ...) => (LoweredPubBarrier ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // Small moves
 (Move [0] _ _ mem) => mem
--- a/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/_gen/RISCV64Ops.go
@ -49,7 +49,7 @@ func riscv64RegName(r int) string {

 func init() {
 	var regNamesRISCV64 []string
-	var gpMask, fpMask, gpgMask, gpspMask, gpspsbMask, gpspsbgMask regMask
+	var gpMask, fpMask, gpgMask, gpspMask, gpspsbMask, gpspsbgMask, first16Mask regMask
 	regNamed := make(map[string]regMask)

 	// Build the list of register names, creating an appropriately indexed
@ -93,6 +93,9 @@ func init() {
 			gpspMask |= mask
 			gpspsbMask |= mask
 			gpspsbgMask |= mask
+			if r >= 5 && r < 5+16 {
+				first16Mask |= mask
+			}
 		}
 	}

@ -429,12 +432,15 @@ func init() {
 		// Do data barrier. arg0=memorys
 		{name: "LoweredPubBarrier", argLength: 1, asm: "FENCE", hasSideEffects: true},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X7"], regNamed["X28"]}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X6"], regNamed["X7"]}}, typ: "Mem", call: true},  // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{regNamed["X5"], regNamed["X6"]}}, typ: "Mem", call: true},  // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16Mask, first16Mask}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16Mask}}, typ: "Mem", call: true},       // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16Mask}}, typ: "Mem", call: true},       // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                                    // arg0=mem, returns memory.

 		// F extension.
 		{name: "FADDS", argLength: 2, reg: fp21, asm: "FADDS", commutative: true, typ: "Float32"},                                           // arg0 + arg1
--- a/src/cmd/compile/internal/ssa/_gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/_gen/S390X.rules
@ -145,6 +145,9 @@

 (Sqrt32    ...) => (FSQRTS ...)

+(Max(64|32)F ...) => (WFMAX(D|S)B ...)
+(Min(64|32)F ...) => (WFMIN(D|S)B ...)
+
 // Atomic loads and stores.
 // The SYNC instruction (fast-BCR-serialization) prevents store-load
 // reordering. Other sequences of memory operations (load-load,
@ -455,9 +458,11 @@
 // Write barrier.
 (WB ...) => (LoweredWB ...)

-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds ...) => (LoweredPanicBoundsRR ...)
+(LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+(LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+(LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+(LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)

 // ***************************
 // Above: lowering rules
--- a/src/cmd/compile/internal/ssa/_gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/_gen/S390XOps.go
@ -114,6 +114,7 @@ func init() {
 		sb  = buildReg("SB")
 		r0  = buildReg("R0")
 		tmp = buildReg("R11") // R11 is used as a temporary in a small number of instructions.
+		lr  = buildReg("R14")

 		// R10 is reserved by the assembler.
 		gp   = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12 R14")
@ -222,6 +223,12 @@ func init() {
 		{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"},                                                                       // fp64/fp32 clear sign bit
 		{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"},                                                                       // fp64/fp32 copy arg1 sign bit to arg0

+		// Single element vector floating point min / max instructions
+		{name: "WFMAXDB", argLength: 2, reg: fp21, asm: "WFMAXDB", typ: "Float64"}, // max[float64](arg0, arg1)
+		{name: "WFMAXSB", argLength: 2, reg: fp21, asm: "WFMAXSB", typ: "Float32"}, // max[float32](arg0, arg1)
+		{name: "WFMINDB", argLength: 2, reg: fp21, asm: "WFMINDB", typ: "Float64"}, // min[float64](arg0, arg1)
+		{name: "WFMINSB", argLength: 2, reg: fp21, asm: "WFMINSB", typ: "Float32"}, // min[float32](arg0, arg1)
+
 		// Round to integer, float64 only.
 		//
 		// aux | rounding mode
@ -512,12 +519,15 @@ func init() {
 		// Returns a pointer to a write barrier buffer in R9.
 		{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R14") | r1, outputs: []regMask{r9}}, clobberFlags: true, aux: "Int64"},

-		// There are three of these functions so that they can have three different register inputs.
-		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
-		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
+		// the RC and CR versions are used when one of the arguments is a constant. CC is used
+		// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
+		// failure means the length must have also been 0).
+		// AuxInt contains a report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{gp &^ lr, gp &^ lr}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
+		{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=x, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{gp &^ lr}}, typ: "Mem", call: true},    // arg0=y, arg1=mem, returns memory.
+		{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true},                              // arg0=mem, returns memory.

 		// Constant condition code values. The condition code can be 0, 1, 2 or 3.
 		{name: "FlagEQ"}, // CC=0 (equal)
--- a/src/cmd/compile/internal/ssa/_gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/_gen/generic.rules
@ -295,6 +295,10 @@
 (Neq16 (Const16 <t> [c]) (Add16 (Const16 <t> [d]) x)) => (Neq16 (Const16 <t> [c-d]) x)
 (Neq8  (Const8  <t> [c]) (Add8  (Const8  <t> [d]) x)) => (Neq8  (Const8  <t> [c-d]) x)

+(CondSelect x _ (ConstBool [true ])) => x
+(CondSelect _ y (ConstBool [false])) => y
+(CondSelect x x _) => x
+
 // signed integer range: ( c <= x && x (<|<=) d ) -> ( unsigned(x-c) (<|<=) unsigned(d-c) )
 (AndB (Leq64 (Const64 [c]) x) ((Less|Leq)64 x (Const64 [d]))) && d >= c => ((Less|Leq)64U (Sub64 <x.Type> x (Const64 <x.Type> [c])) (Const64 <x.Type> [d-c]))
 (AndB (Leq32 (Const32 [c]) x) ((Less|Leq)32 x (Const32 [d]))) && d >= c => ((Less|Leq)32U (Sub32 <x.Type> x (Const32 <x.Type> [c])) (Const32 <x.Type> [d-c]))
@ -1010,11 +1014,10 @@
 // See ../magic.go for a detailed description of these algorithms.

 // Unsigned divide by power of 2.  Strength reduce to a shift.
-(Div8u  n (Const8  [c])) && isPowerOfTwo(c) => (Rsh8Ux64  n (Const64 <typ.UInt64> [log8(c)]))
-(Div16u n (Const16 [c])) && isPowerOfTwo(c) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16(c)]))
-(Div32u n (Const32 [c])) && isPowerOfTwo(c) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32(c)]))
-(Div64u n (Const64 [c])) && isPowerOfTwo(c) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64(c)]))
-(Div64u n (Const64 [-1<<63]))                 => (Rsh64Ux64 n (Const64 <typ.UInt64> [63]))
+(Div8u  n (Const8  [c])) && isUnsignedPowerOfTwo(uint8(c)) => (Rsh8Ux64  n (Const64 <typ.UInt64> [log8u(uint8(c))]))
+(Div16u n (Const16 [c])) && isUnsignedPowerOfTwo(uint16(c)) => (Rsh16Ux64 n (Const64 <typ.UInt64> [log16u(uint16(c))]))
+(Div32u n (Const32 [c])) && isUnsignedPowerOfTwo(uint32(c)) => (Rsh32Ux64 n (Const64 <typ.UInt64> [log32u(uint32(c))]))
+(Div64u n (Const64 [c])) && isUnsignedPowerOfTwo(uint64(c)) => (Rsh64Ux64 n (Const64 <typ.UInt64> [log64u(uint64(c))]))

 // Signed non-negative divide by power of 2.
 (Div8  n (Const8  [c])) && isNonNegative(n) && isPowerOfTwo(c) => (Rsh8Ux64  n (Const64 <typ.UInt64> [log8(c)]))
@ -1290,11 +1293,10 @@
      (Const64 <typ.UInt64> [63])))

 // Unsigned mod by power of 2 constant.
-(Mod8u  <t> n (Const8  [c])) && isPowerOfTwo(c) => (And8  n (Const8  <t> [c-1]))
-(Mod16u <t> n (Const16 [c])) && isPowerOfTwo(c) => (And16 n (Const16 <t> [c-1]))
-(Mod32u <t> n (Const32 [c])) && isPowerOfTwo(c) => (And32 n (Const32 <t> [c-1]))
-(Mod64u <t> n (Const64 [c])) && isPowerOfTwo(c) => (And64 n (Const64 <t> [c-1]))
-(Mod64u <t> n (Const64 [-1<<63]))                 => (And64 n (Const64 <t> [1<<63-1]))
+(Mod8u  <t> n (Const8  [c])) && isUnsignedPowerOfTwo(uint8(c)) => (And8  n (Const8  <t> [c-1]))
+(Mod16u <t> n (Const16 [c])) && isUnsignedPowerOfTwo(uint16(c)) => (And16 n (Const16 <t> [c-1]))
+(Mod32u <t> n (Const32 [c])) && isUnsignedPowerOfTwo(uint32(c)) => (And32 n (Const32 <t> [c-1]))
+(Mod64u <t> n (Const64 [c])) && isUnsignedPowerOfTwo(uint64(c)) => (And64 n (Const64 <t> [c-1]))

 // Signed non-negative mod by power of 2 constant.
 (Mod8  <t> n (Const8  [c])) && isNonNegative(n) && isPowerOfTwo(c) => (And8  n (Const8  <t> [c-1]))
@ -2053,9 +2055,7 @@
 (Select1 (MakeTuple x y)) => y

 // for rewriting results of some late-expanded rewrites (below)
-(SelectN [0] (MakeResult x ___)) => x
-(SelectN [1] (MakeResult x y ___)) => y
-(SelectN [2] (MakeResult x y z ___)) => z
+(SelectN [n] m:(MakeResult ___)) => m.Args[n]

 // for late-expanded calls, recognize newobject and remove zeroing and nilchecks
 (Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call))
@ -2843,3 +2843,19 @@
  && clobber(sbts)
  && clobber(key)
 => (StaticLECall {f} [argsize] dict_ (StringMake <typ.String> ptr len) mem)
+
+// Transform some CondSelect into math operations.
+// if b { x++ } => x += b // but not on arm64 because it has CSINC
+(CondSelect (Add8 <t> x (Const8 [1])) x bool) && config.arch != "arm64" => (Add8 x (CvtBoolToUint8 <t> bool))
+(CondSelect (Add(64|32|16) <t> x (Const(64|32|16) [1])) x bool) && config.arch != "arm64" => (Add(64|32|16) x (ZeroExt8to(64|32|16) <t> (CvtBoolToUint8 <types.Types[types.TUINT8]> bool)))
+
+// if b { x-- } => x -= b
+(CondSelect (Add8 <t> x (Const8 [-1])) x bool) => (Sub8 x (CvtBoolToUint8 <t> bool))
+(CondSelect (Add(64|32|16) <t> x (Const(64|32|16) [-1])) x bool) => (Sub(64|32|16) x (ZeroExt8to(64|32|16) <t> (CvtBoolToUint8 <types.Types[types.TUINT8]> bool)))
+
+// if b { x <<= 1 } => x <<= b
+(CondSelect (Lsh(64|32|16|8)x64 x (Const64 [1])) x bool) => (Lsh(64|32|16|8)x8 [true] x (CvtBoolToUint8 <types.Types[types.TUINT8]> bool))
+
+// if b { x >>= 1 } => x >>= b
+(CondSelect (Rsh(64|32|16|8)x64 x (Const64 [1])) x bool) => (Rsh(64|32|16|8)x8 [true] x (CvtBoolToUint8 <types.Types[types.TUINT8]> bool))
+(CondSelect (Rsh(64|32|16|8)Ux64 x (Const64 [1])) x bool) => (Rsh(64|32|16|8)Ux8 [true] x (CvtBoolToUint8 <types.Types[types.TUINT8]> bool))
--- a/src/cmd/compile/internal/ssa/_gen/main.go
+++ b/src/cmd/compile/internal/ssa/_gen/main.go
@ -88,6 +88,10 @@ type regInfo struct {
 	// clobbers encodes the set of registers that are overwritten by
 	// the instruction (other than the output registers).
 	clobbers regMask
+	// Instruction clobbers the register containing input 0.
+	clobbersArg0 bool
+	// Instruction clobbers the register containing input 1.
+	clobbersArg1 bool
 	// outputs[i] encodes the set of registers allowed for the i'th output.
 	outputs []regMask
 }
@ -294,7 +298,7 @@ func genOp() {
 			fmt.Fprintf(w, "argLen: %d,\n", v.argLength)

 			if v.rematerializeable {
-				if v.reg.clobbers != 0 {
+				if v.reg.clobbers != 0 || v.reg.clobbersArg0 || v.reg.clobbersArg1 {
 					log.Fatalf("%s is rematerializeable and clobbers registers", v.name)
 				}
 				if v.clobberFlags {
@ -403,6 +407,12 @@ func genOp() {
 			if v.reg.clobbers > 0 {
 				fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
 			}
+			if v.reg.clobbersArg0 {
+				fmt.Fprintf(w, "clobbersArg0: true,\n")
+			}
+			if v.reg.clobbersArg1 {
+				fmt.Fprintf(w, "clobbersArg1: true,\n")
+			}

 			// reg outputs
 			s = s[:0]
--- a/src/cmd/compile/internal/ssa/_gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/_gen/rulegen.go
@ -549,6 +549,13 @@ func (u *unusedInspector) node(node ast.Node) {
 			}
 		}
 	case *ast.BasicLit:
+	case *ast.CompositeLit:
+		for _, e := range node.Elts {
+			u.node(e)
+		}
+	case *ast.KeyValueExpr:
+		u.node(node.Key)
+		u.node(node.Value)
 	case *ast.ValueSpec:
 		u.exprs(node.Values)
 	default:
@ -1440,7 +1447,8 @@ func parseValue(val string, arch arch, loc string) (op opData, oparch, typ, auxi
 func opHasAuxInt(op opData) bool {
 	switch op.aux {
 	case "Bool", "Int8", "Int16", "Int32", "Int64", "Int128", "UInt8", "Float32", "Float64",
-		"SymOff", "CallOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant", "CCop":
+		"SymOff", "CallOff", "SymValAndOff", "TypSize", "ARM64BitField", "FlagConstant", "CCop",
+		"PanicBoundsC", "PanicBoundsCC":
 		return true
 	}
 	return false
@ -1449,7 +1457,7 @@ func opHasAuxInt(op opData) bool {
 func opHasAux(op opData) bool {
 	switch op.aux {
 	case "String", "Sym", "SymOff", "Call", "CallOff", "SymValAndOff", "Typ", "TypSize",
-		"S390XCCMask", "S390XRotateParams":
+		"S390XCCMask", "S390XRotateParams", "PanicBoundsC", "PanicBoundsCC":
 		return true
 	}
 	return false
@ -1804,6 +1812,10 @@ func (op opData) auxType() string {
 		return "s390x.CCMask"
 	case "S390XRotateParams":
 		return "s390x.RotateParams"
+	case "PanicBoundsC":
+		return "PanicBoundsC"
+	case "PanicBoundsCC":
+		return "PanicBoundsCC"
 	default:
 		return "invalid"
 	}
@ -1844,6 +1856,8 @@ func (op opData) auxIntType() string {
 		return "flagConstant"
 	case "ARM64BitField":
 		return "arm64BitField"
+	case "PanicBoundsC", "PanicBoundsCC":
+		return "int64"
 	default:
 		return "invalid"
 	}
--- a/src/cmd/compile/internal/ssa/biasedsparsemap.go
+++ b/src/cmd/compile/internal/ssa/biasedsparsemap.go
@ -56,19 +56,23 @@ func (s *biasedSparseMap) contains(x uint) bool {
 	return s.s.contains(ID(int(x) - s.first))
 }

-// get returns the value s maps for key x, or -1 if
-// x is not mapped or is out of range for s.
-func (s *biasedSparseMap) get(x uint) int32 {
+// get returns the value s maps for key x and true, or
+// 0/false if x is not mapped or is out of range for s.
+func (s *biasedSparseMap) get(x uint) (int32, bool) {
 	if s == nil || s.s == nil {
-		return -1
+		return 0, false
 	}
 	if int(x) < s.first {
-		return -1
+		return 0, false
 	}
 	if int(x) >= s.cap() {
-		return -1
+		return 0, false
 	}
-	return s.s.get(ID(int(x) - s.first))
+	k := ID(int(x) - s.first)
+	if !s.s.contains(k) {
+		return 0, false
+	}
+	return s.s.get(k)
 }

 // getEntry returns the i'th key and value stored in s,
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@ -215,6 +215,9 @@ func checkFunc(f *Func) {
 					f.Fatalf("bad FlagConstant AuxInt value for %v", v)
 				}
 				canHaveAuxInt = true
+			case auxPanicBoundsC, auxPanicBoundsCC:
+				canHaveAux = true
+				canHaveAuxInt = true
 			default:
 				f.Fatalf("unknown aux type for %s", v.Op)
 			}
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@ -473,11 +473,11 @@ var passes = [...]pass{
 	{name: "expand calls", fn: expandCalls, required: true},
 	{name: "decompose builtin", fn: postExpandCallsDecompose, required: true},
 	{name: "softfloat", fn: softfloat, required: true},
+	{name: "branchelim", fn: branchelim},
 	{name: "late opt", fn: opt, required: true}, // TODO: split required rules and optimizing rules
 	{name: "dead auto elim", fn: elimDeadAutosGeneric},
 	{name: "sccp", fn: sccp},
 	{name: "generic deadcode", fn: deadcode, required: true}, // remove dead stores, which otherwise mess up store chain
-	{name: "branchelim", fn: branchelim},
 	{name: "late fuse", fn: fuseLate},
 	{name: "check bce", fn: checkbce},
 	{name: "dse", fn: dse},
@ -583,6 +583,10 @@ var passOrder = [...]constraint{
 	{"late fuse", "memcombine"},
 	// memcombine is a arch-independent pass.
 	{"memcombine", "lower"},
+	// late opt transform some CondSelects into math.
+	{"branchelim", "late opt"},
+	// ranchelim is an arch-independent pass.
+	{"branchelim", "lower"},
 }

 func init() {
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@ -291,6 +291,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 		c.RegSize = 8
 		c.lowerBlock = rewriteBlockLOONG64
 		c.lowerValue = rewriteValueLOONG64
+		c.lateLowerBlock = rewriteBlockLOONG64latelower
+		c.lateLowerValue = rewriteValueLOONG64latelower
 		c.registers = registersLOONG64[:]
 		c.gpRegMask = gpRegMaskLOONG64
 		c.fpRegMask = fpRegMaskLOONG64
@ -570,6 +572,43 @@ func (c *Config) buildRecipes(arch string) {
 					return m.Block.NewValue2I(m.Pos, OpARM64SUBshiftLL, m.Type, int64(i), x, y)
 				})
 		}
+	case "loong64":
+		// - multiply is 4 cycles.
+		// - add/sub/shift are 1 cycle.
+		// On loong64, using a multiply also needs to load the constant into a register.
+		// TODO: figure out a happy medium.
+		mulCost = 45
+
+		// add
+		r(1, 1, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue2(m.Pos, OpLOONG64ADDV, m.Type, x, y)
+			})
+		// neg
+		r(-1, 0, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue1(m.Pos, OpLOONG64NEGV, m.Type, x)
+			})
+		// sub
+		r(1, -1, 10,
+			func(m, x, y *Value) *Value {
+				return m.Block.NewValue2(m.Pos, OpLOONG64SUBV, m.Type, x, y)
+			})
+
+		// regular shifts
+		for i := 1; i < 64; i++ {
+			c := 10
+			if i == 1 {
+				// Prefer x<<1 over x+x.
+				// Note that we eventually reverse this decision in LOONG64latelower.rules,
+				// but this makes shift combining rules in LOONG64.rules simpler.
+				c--
+			}
+			r(1<<i, 0, c,
+				func(m, x, y *Value) *Value {
+					return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
+				})
+		}
 	}

 	c.mulRecipes = map[int64]mulRecipe{}
@ -636,17 +675,58 @@ func (c *Config) buildRecipes(arch string) {
 		}
 	}

+	// Currently we only process 3 linear combination instructions for loong64.
+	if arch == "loong64" {
+		// Three-instruction recipes.
+		// D: The first and the second are all single-instruction recipes, and they are also the third's inputs.
+		// E: The first single-instruction is the second's input, and the second is the third's input.
+
+		// D
+		for _, first := range linearCombos {
+			for _, second := range linearCombos {
+				for _, third := range linearCombos {
+					x := third.a*(first.a+first.b) + third.b*(second.a+second.b)
+					cost := first.cost + second.cost + third.cost
+					old := c.mulRecipes[x]
+					if (old.build == nil || cost < old.cost) && cost < mulCost {
+						c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+							v1 := first.build(m, v, v)
+							v2 := second.build(m, v, v)
+							return third.build(m, v1, v2)
+						}}
+					}
+				}
+			}
+		}
+
+		// E
+		for _, first := range linearCombos {
+			for _, second := range linearCombos {
+				for _, third := range linearCombos {
+					x := third.a*(second.a*(first.a+first.b)+second.b) + third.b
+					cost := first.cost + second.cost + third.cost
+					old := c.mulRecipes[x]
+					if (old.build == nil || cost < old.cost) && cost < mulCost {
+						c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
+							v1 := first.build(m, v, v)
+							v2 := second.build(m, v1, v)
+							return third.build(m, v2, v)
+						}}
+					}
+				}
+			}
+		}
+	}
+
 	// These cases should be handled specially by rewrite rules.
 	// (Otherwise v * 1 == (neg (neg v)))
 	delete(c.mulRecipes, 0)
 	delete(c.mulRecipes, 1)

-	// Currently we assume that it doesn't help to do 3 linear
-	// combination instructions.
-
 	// Currently:
 	// len(c.mulRecipes) == 5984 on arm64
 	//                       680 on amd64
+	//                      5984 on loong64
 	// This function takes ~2.5ms on arm64.
 	//println(len(c.mulRecipes))
 }
--- a/src/cmd/compile/internal/ssa/deadcode.go
+++ b/src/cmd/compile/internal/ssa/deadcode.go
@ -257,7 +257,7 @@ func deadcode(f *Func) {
 	// Find new homes for lost lines -- require earliest in data flow with same line that is also in same block
 	for i := len(order) - 1; i >= 0; i-- {
 		w := order[i]
-		if j := pendingLines.get(w.Pos); j > -1 && f.Blocks[j] == w.Block {
+		if j, ok := pendingLines.get(w.Pos); ok && f.Blocks[j] == w.Block {
 			w.Pos = w.Pos.WithIsStmt()
 			pendingLines.remove(w.Pos)
 		}
--- a/src/cmd/compile/internal/ssa/deadstore.go
+++ b/src/cmd/compile/internal/ssa/deadstore.go
@ -118,7 +118,8 @@ func dse(f *Func) {
 					ptr = la
 				}
 			}
-			sr := shadowRange(shadowed.get(ptr.ID))
+			srNum, _ := shadowed.get(ptr.ID)
+			sr := shadowRange(srNum)
 			if sr.contains(off, off+sz) {
 				// Modify the store/zero into a copy of the memory state,
 				// effectively eliding the store operation.
@ -156,9 +157,7 @@ func dse(f *Func) {

 // A shadowRange encodes a set of byte offsets [lo():hi()] from
 // a given pointer that will be written to later in the block.
-// A zero shadowRange encodes an empty shadowed range (and so
-// does a -1 shadowRange, which is what sparsemap.get returns
-// on a failed lookup).
+// A zero shadowRange encodes an empty shadowed range.
 type shadowRange int32

 func (sr shadowRange) lo() int64 {
--- a/src/cmd/compile/internal/ssa/debug.go
+++ b/src/cmd/compile/internal/ssa/debug.go
@ -41,6 +41,9 @@ type FuncDebug struct {
 	RegOutputParams []*ir.Name
 	// Variable declarations that were removed during optimization
 	OptDcl []*ir.Name
+	// The ssa.Func.EntryID value, used to build location lists for
+	// return values promoted to heap in later DWARF generation.
+	EntryID ID

 	// Filled in by the user. Translates Block and Value ID to PC.
 	//
@ -1645,13 +1648,13 @@ func readPtr(ctxt *obj.Link, buf []byte) uint64 {

 }

-// setupLocList creates the initial portion of a location list for a
+// SetupLocList creates the initial portion of a location list for a
 // user variable. It emits the encoded start/end of the range and a
 // placeholder for the size. Return value is the new list plus the
 // slot in the list holding the size (to be updated later).
-func setupLocList(ctxt *obj.Link, f *Func, list []byte, st, en ID) ([]byte, int) {
-	start, startOK := encodeValue(ctxt, f.Entry.ID, st)
-	end, endOK := encodeValue(ctxt, f.Entry.ID, en)
+func SetupLocList(ctxt *obj.Link, entryID ID, list []byte, st, en ID) ([]byte, int) {
+	start, startOK := encodeValue(ctxt, entryID, st)
+	end, endOK := encodeValue(ctxt, entryID, en)
 	if !startOK || !endOK {
 		// This could happen if someone writes a function that uses
 		// >65K values on a 32-bit platform. Hopefully a degraded debugging
@ -1800,7 +1803,6 @@ func isNamedRegParam(p abi.ABIParamAssignment) bool {
 // appropriate for the ".closureptr" compiler-synthesized variable
 // needed by the debugger for range func bodies.
 func BuildFuncDebugNoOptimized(ctxt *obj.Link, f *Func, loggingEnabled bool, stackOffset func(LocalSlot) int32, rval *FuncDebug) {
-
 	needCloCtx := f.CloSlot != nil
 	pri := f.ABISelf.ABIAnalyzeFuncType(f.Type)

@ -1911,7 +1913,7 @@ func BuildFuncDebugNoOptimized(ctxt *obj.Link, f *Func, loggingEnabled bool, sta
 		// Param is arriving in one or more registers. We need a 2-element
 		// location expression for it. First entry in location list
 		// will correspond to lifetime in input registers.
-		list, sizeIdx := setupLocList(ctxt, f, rval.LocationLists[pidx],
+		list, sizeIdx := SetupLocList(ctxt, f.Entry.ID, rval.LocationLists[pidx],
 			BlockStart.ID, afterPrologVal)
 		if list == nil {
 			pidx++
@ -1961,7 +1963,7 @@ func BuildFuncDebugNoOptimized(ctxt *obj.Link, f *Func, loggingEnabled bool, sta

 		// Second entry in the location list will be the stack home
 		// of the param, once it has been spilled.  Emit that now.
-		list, sizeIdx = setupLocList(ctxt, f, list,
+		list, sizeIdx = SetupLocList(ctxt, f.Entry.ID, list,
 			afterPrologVal, FuncEnd.ID)
 		if list == nil {
 			pidx++
--- a/src/cmd/compile/internal/ssa/debug_lines_test.go
+++ b/src/cmd/compile/internal/ssa/debug_lines_test.go
@ -115,6 +115,34 @@ func TestDebugLines_53456(t *testing.T) {
 	testDebugLinesDefault(t, "-N -l", "b53456.go", "(*T).Inc", []int{15, 16, 17, 18}, true)
 }

+func TestDebugLines_74576(t *testing.T) {
+	unixOnly(t)
+
+	switch testGoArch() {
+	default:
+		// Failed on linux/riscv64 (issue 74669), but conservatively
+		// skip many architectures like several other tests here.
+		t.Skip("skipped for many architectures")
+
+	case "arm64", "amd64", "loong64":
+		tests := []struct {
+			file      string
+			wantStmts []int
+		}{
+			{"i74576a.go", []int{12, 13, 13, 14}},
+			{"i74576b.go", []int{12, 13, 13, 14}},
+			{"i74576c.go", []int{12, 13, 13, 14}},
+		}
+		t.Parallel()
+		for _, test := range tests {
+			t.Run(test.file, func(t *testing.T) {
+				t.Parallel()
+				testDebugLines(t, "-N -l", test.file, "main", test.wantStmts, false)
+			})
+		}
+	}
+}
+
 func compileAndDump(t *testing.T, file, function, moreGCFlags string) []byte {
 	testenv.MustHaveGoBuild(t)

@ -223,6 +251,9 @@ func testInlineStack(t *testing.T, file, function string, wantStacks [][]int) {
 // then verifies that the statement-marked lines in that file are the same as those in wantStmts
 // These files must all be short because this is super-fragile.
 // "go build" is run in a temporary directory that is normally deleted, unless -test.v
+//
+// TODO: the tests calling this are somewhat expensive; perhaps more tests can be marked t.Parallel,
+// or perhaps the mechanism here can be made more efficient.
 func testDebugLines(t *testing.T, gcflags, file, function string, wantStmts []int, ignoreRepeats bool) {
 	dumpBytes := compileAndDump(t, file, function, gcflags)
 	dump := bufio.NewScanner(bytes.NewReader(dumpBytes))
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@ -243,11 +243,8 @@ func (x *expandState) rewriteFuncResults(v *Value, b *Block, aux *AuxCall) {
 		if len(aRegs) > 0 {
 			result = &allResults
 		} else {
-			if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr {
-				addr := a.Args[0]
-				if addr.MemoryArg() == a.MemoryArg() && addr.Aux == aux.NameOfResult(i) {
-					continue // Self move to output parameter
-				}
+			if a.Op == OpLoad && a.Args[0].Op == OpLocalAddr && a.Args[0].Aux == aux.NameOfResult(i) {
+				continue // Self move to output parameter
 			}
 		}
 		rc.init(aRegs, aux.abiInfo, result, auxBase, auxOffset)
--- a/src/cmd/compile/internal/ssa/likelyadjust.go
+++ b/src/cmd/compile/internal/ssa/likelyadjust.go
@ -12,18 +12,15 @@ type loop struct {
 	header *Block // The header node of this (reducible) loop
 	outer  *loop  // loop containing this loop

-	// By default, children, exits, and depth are not initialized.
-	children []*loop  // loops nested directly within this loop. Initialized by assembleChildren().
-	exits    []*Block // exits records blocks reached by exits from this loop. Initialized by findExits().
-
 	// Next three fields used by regalloc and/or
 	// aid in computation of inner-ness and list of blocks.
 	nBlocks int32 // Number of blocks in this loop but not within inner loops
-	depth   int16 // Nesting depth of the loop; 1 is outermost. Initialized by calculateDepths().
+	depth   int16 // Nesting depth of the loop; 1 is outermost.
 	isInner bool  // True if never discovered to contain a loop

-	// register allocation uses this.
-	containsUnavoidableCall bool // True if all paths through the loop have a call
+	// True if all paths through the loop have a call.
+	// Computed and used by regalloc; stored here for convenience.
+	containsUnavoidableCall bool
 }

 // outerinner records that outer contains inner
@ -49,18 +46,6 @@ func (sdom SparseTree) outerinner(outer, inner *loop) {
 	outer.isInner = false
 }

-func checkContainsCall(bb *Block) bool {
-	if bb.Kind == BlockDefer {
-		return true
-	}
-	for _, v := range bb.Values {
-		if opcodeTable[v.Op].call {
-			return true
-		}
-	}
-	return false
-}
-
 type loopnest struct {
 	f              *Func
 	b2l            []*loop
@ -68,9 +53,6 @@ type loopnest struct {
 	sdom           SparseTree
 	loops          []*loop
 	hasIrreducible bool // TODO current treatment of irreducible loops is very flaky, if accurate loops are needed, must punt at function level.
-
-	// Record which of the lazily initialized fields have actually been initialized.
-	initializedChildren, initializedDepth, initializedExits bool
 }

 const (
@ -355,91 +337,59 @@ func loopnestfor(f *Func) *loopnest {
 		visited[b.ID] = true
 	}

-	ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}
-
-	// Calculate containsUnavoidableCall for regalloc
-	dominatedByCall := f.Cache.allocBoolSlice(f.NumBlocks())
-	defer f.Cache.freeBoolSlice(dominatedByCall)
-	for _, b := range po {
-		if checkContainsCall(b) {
-			dominatedByCall[b.ID] = true
-		}
-	}
-	// Run dfs to find path through the loop that avoids all calls.
-	// Such path either escapes loop or return back to header.
-	// It isn't enough to have exit not dominated by any call, for example:
-	// ... some loop
-	// call1   call2
-	//   \      /
-	//     exit
-	// ...
-	// exit is not dominated by any call, but we don't have call-free path to it.
+	// Compute depths.
 	for _, l := range loops {
-		// Header contains call.
-		if dominatedByCall[l.header.ID] {
-			l.containsUnavoidableCall = true
+		if l.depth != 0 {
+			// Already computed because it is an ancestor of
+			// a previous loop.
 			continue
 		}
-		callfreepath := false
-		tovisit := make([]*Block, 0, len(l.header.Succs))
-		// Push all non-loop non-exit successors of header onto toVisit.
-		for _, s := range l.header.Succs {
-			nb := s.Block()
-			// This corresponds to loop with zero iterations.
-			if !l.iterationEnd(nb, b2l) {
-				tovisit = append(tovisit, nb)
-			}
-		}
-		for len(tovisit) > 0 {
-			cur := tovisit[len(tovisit)-1]
-			tovisit = tovisit[:len(tovisit)-1]
-			if dominatedByCall[cur.ID] {
-				continue
-			}
-			// Record visited in dominatedByCall.
-			dominatedByCall[cur.ID] = true
-			for _, s := range cur.Succs {
-				nb := s.Block()
-				if l.iterationEnd(nb, b2l) {
-					callfreepath = true
-				}
-				if !dominatedByCall[nb.ID] {
-					tovisit = append(tovisit, nb)
-				}
-
-			}
-			if callfreepath {
+		// Find depth by walking up the loop tree.
+		d := int16(0)
+		for x := l; x != nil; x = x.outer {
+			if x.depth != 0 {
+				d += x.depth
 				break
 			}
+			d++
 		}
-		if !callfreepath {
-			l.containsUnavoidableCall = true
+		// Set depth for every ancestor.
+		for x := l; x != nil; x = x.outer {
+			if x.depth != 0 {
+				break
+			}
+			x.depth = d
+			d--
 		}
 	}
+	// Double-check depths.
+	for _, l := range loops {
+		want := int16(1)
+		if l.outer != nil {
+			want = l.outer.depth + 1
+		}
+		if l.depth != want {
+			l.header.Fatalf("bad depth calculation for loop %s: got %d want %d", l.header, l.depth, want)
+		}
+	}
+
+	ln := &loopnest{f: f, b2l: b2l, po: po, sdom: sdom, loops: loops, hasIrreducible: sawIrred}

 	// Curious about the loopiness? "-d=ssa/likelyadjust/stats"
 	if f.pass != nil && f.pass.stats > 0 && len(loops) > 0 {
-		ln.assembleChildren()
-		ln.calculateDepths()
-		ln.findExits()

 		// Note stats for non-innermost loops are slightly flawed because
 		// they don't account for inner loop exits that span multiple levels.

 		for _, l := range loops {
-			x := len(l.exits)
-			cf := 0
-			if !l.containsUnavoidableCall {
-				cf = 1
-			}
 			inner := 0
 			if l.isInner {
 				inner++
 			}

-			f.LogStat("loopstats:",
-				l.depth, "depth", x, "exits",
-				inner, "is_inner", cf, "always_calls", l.nBlocks, "n_blocks")
+			f.LogStat("loopstats in "+f.Name+":",
+				l.depth, "depth",
+				inner, "is_inner", l.nBlocks, "n_blocks")
 		}
 	}

@ -465,62 +415,6 @@ func loopnestfor(f *Func) *loopnest {
 	return ln
 }

-// assembleChildren initializes the children field of each
-// loop in the nest.  Loop A is a child of loop B if A is
-// directly nested within B (based on the reducible-loops
-// detection above)
-func (ln *loopnest) assembleChildren() {
-	if ln.initializedChildren {
-		return
-	}
-	for _, l := range ln.loops {
-		if l.outer != nil {
-			l.outer.children = append(l.outer.children, l)
-		}
-	}
-	ln.initializedChildren = true
-}
-
-// calculateDepths uses the children field of loops
-// to determine the nesting depth (outer=1) of each
-// loop.  This is helpful for finding exit edges.
-func (ln *loopnest) calculateDepths() {
-	if ln.initializedDepth {
-		return
-	}
-	ln.assembleChildren()
-	for _, l := range ln.loops {
-		if l.outer == nil {
-			l.setDepth(1)
-		}
-	}
-	ln.initializedDepth = true
-}
-
-// findExits uses loop depth information to find the
-// exits from a loop.
-func (ln *loopnest) findExits() {
-	if ln.initializedExits {
-		return
-	}
-	ln.calculateDepths()
-	b2l := ln.b2l
-	for _, b := range ln.po {
-		l := b2l[b.ID]
-		if l != nil && len(b.Succs) == 2 {
-			sl := b2l[b.Succs[0].b.ID]
-			if recordIfExit(l, sl, b.Succs[0].b) {
-				continue
-			}
-			sl = b2l[b.Succs[1].b.ID]
-			if recordIfExit(l, sl, b.Succs[1].b) {
-				continue
-			}
-		}
-	}
-	ln.initializedExits = true
-}
-
 // depth returns the loop nesting level of block b.
 func (ln *loopnest) depth(b ID) int16 {
 	if l := ln.b2l[b]; l != nil {
@ -528,39 +422,3 @@ func (ln *loopnest) depth(b ID) int16 {
 	}
 	return 0
 }
-
-// recordIfExit checks sl (the loop containing b) to see if it
-// is outside of loop l, and if so, records b as an exit block
-// from l and returns true.
-func recordIfExit(l, sl *loop, b *Block) bool {
-	if sl != l {
-		if sl == nil || sl.depth <= l.depth {
-			l.exits = append(l.exits, b)
-			return true
-		}
-		// sl is not nil, and is deeper than l
-		// it's possible for this to be a goto into an irreducible loop made from gotos.
-		for sl.depth > l.depth {
-			sl = sl.outer
-		}
-		if sl != l {
-			l.exits = append(l.exits, b)
-			return true
-		}
-	}
-	return false
-}
-
-func (l *loop) setDepth(d int16) {
-	l.depth = d
-	for _, c := range l.children {
-		c.setDepth(d + 1)
-	}
-}
-
-// iterationEnd checks if block b ends iteration of loop l.
-// Ending iteration means either escaping to outer loop/code or
-// going back to header
-func (l *loop) iterationEnd(b *Block, b2l []*loop) bool {
-	return b == l.header || b2l[b.ID] == nil || (b2l[b.ID] != l && b2l[b.ID].depth <= l.depth)
-}
--- a/src/cmd/compile/internal/ssa/loopbce.go
+++ b/src/cmd/compile/internal/ssa/loopbce.go
@ -37,19 +37,20 @@ type indVar struct {
 //   - the minimum bound
 //   - the increment value
 //   - the "next" value (SSA value that is Phi'd into the induction variable every loop)
+//   - the header's edge returning from the body
 //
 // Currently, we detect induction variables that match (Phi min nxt),
 // with nxt being (Add inc ind).
 // If it can't parse the induction variable correctly, it returns (nil, nil, nil).
-func parseIndVar(ind *Value) (min, inc, nxt *Value) {
+func parseIndVar(ind *Value) (min, inc, nxt *Value, loopReturn Edge) {
 	if ind.Op != OpPhi {
 		return
 	}

 	if n := ind.Args[0]; (n.Op == OpAdd64 || n.Op == OpAdd32 || n.Op == OpAdd16 || n.Op == OpAdd8) && (n.Args[0] == ind || n.Args[1] == ind) {
-		min, nxt = ind.Args[1], n
+		min, nxt, loopReturn = ind.Args[1], n, ind.Block.Preds[0]
 	} else if n := ind.Args[1]; (n.Op == OpAdd64 || n.Op == OpAdd32 || n.Op == OpAdd16 || n.Op == OpAdd8) && (n.Args[0] == ind || n.Args[1] == ind) {
-		min, nxt = ind.Args[0], n
+		min, nxt, loopReturn = ind.Args[0], n, ind.Block.Preds[1]
 	} else {
 		// Not a recognized induction variable.
 		return
@ -111,13 +112,13 @@ func findIndVar(f *Func) []indVar {

 		// See if this is really an induction variable
 		less := true
-		init, inc, nxt := parseIndVar(ind)
+		init, inc, nxt, loopReturn := parseIndVar(ind)
 		if init == nil {
 			// We failed to parse the induction variable. Before punting, we want to check
 			// whether the control op was written with the induction variable on the RHS
 			// instead of the LHS. This happens for the downwards case, like:
 			//     for i := len(n)-1; i >= 0; i--
-			init, inc, nxt = parseIndVar(limit)
+			init, inc, nxt, loopReturn = parseIndVar(limit)
 			if init == nil {
 				// No recognized induction variable on either operand
 				continue
@ -145,6 +146,20 @@ func findIndVar(f *Func) []indVar {
 			continue
 		}

+		// startBody is the edge that eventually returns to the loop header.
+		var startBody Edge
+		switch {
+		case sdom.IsAncestorEq(b.Succs[0].b, loopReturn.b):
+			startBody = b.Succs[0]
+		case sdom.IsAncestorEq(b.Succs[1].b, loopReturn.b):
+			// if x { goto exit } else { goto entry } is identical to if !x { goto entry } else { goto exit }
+			startBody = b.Succs[1]
+			less = !less
+			inclusive = !inclusive
+		default:
+			continue
+		}
+
 		// Increment sign must match comparison direction.
 		// When incrementing, the termination comparison must be ind </<= limit.
 		// When decrementing, the termination comparison must be ind >/>= limit.
@ -172,14 +187,14 @@ func findIndVar(f *Func) []indVar {
 		// First condition: loop entry has a single predecessor, which
 		// is the header block.  This implies that b.Succs[0] is
 		// reached iff ind < limit.
-		if len(b.Succs[0].b.Preds) != 1 {
-			// b.Succs[1] must exit the loop.
+		if len(startBody.b.Preds) != 1 {
+			// the other successor must exit the loop.
 			continue
 		}

-		// Second condition: b.Succs[0] dominates nxt so that
+		// Second condition: startBody.b dominates nxt so that
 		// nxt is computed when inc < limit.
-		if !sdom.IsAncestorEq(b.Succs[0].b, nxt.Block) {
+		if !sdom.IsAncestorEq(startBody.b, nxt.Block) {
 			// inc+ind can only be reached through the branch that enters the loop.
 			continue
 		}
@ -298,7 +313,7 @@ func findIndVar(f *Func) []indVar {
 				nxt:   nxt,
 				min:   min,
 				max:   max,
-				entry: b.Succs[0].b,
+				entry: startBody.b,
 				flags: flags,
 			})
 			b.Logf("found induction variable %v (inc = %v, min = %v, max = %v)\n", ind, inc, min, max)
--- a/src/cmd/compile/internal/ssa/looprotate.go
+++ b/src/cmd/compile/internal/ssa/looprotate.go
@ -4,6 +4,10 @@

 package ssa

+import (
+	"slices"
+)
+
 // loopRotate converts loops with a check-loop-condition-at-beginning
 // to loops with a check-loop-condition-at-end.
 // This helps loops avoid extra unnecessary jumps.
@ -41,10 +45,64 @@ func loopRotate(f *Func) {

 	// Map from block ID to the moving blocks that should
 	// come right after it.
+	// If a block, which has its ID present in keys of the 'after' map,
+	// occurs in some other block's 'after' list, that represents whole
+	// nested loop, e.g. consider an inner loop I nested into an outer
+	// loop O. It and Ot are corresponding top block for these loops
+	// chosen by our algorithm, and It is in the Ot's 'after' list.
+	//
+	//    Before:                     After:
+	//
+	//       e                       e
+	//       │                       │
+	//       │                       │Ot ◄───┐
+	//       ▼                       ▼▼      │
+	//   ┌───Oh ◄────┐           ┌─┬─Oh      │
+	//   │   │       │           │ │         │
+	//   │   │       │           │ │ It◄───┐ │
+	//   │   ▼       │           │ │ ▼     │ │
+	//   │ ┌─Ih◄───┐ │           │ └►Ih    │ │
+	//   │ │ │     │ │           │ ┌─┤     │ │
+	//   │ │ ▼     │ │           │ │ ▼     │ │
+	//   │ │ Ib    │ │           │ │ Ib    │ │
+	//   │ │ └─►It─┘ │           │ │ └─────┘ │
+	//   │ │         │           │ │         │
+	//   │ └►Ie      │           │ └►Ie      │
+	//   │   └─►Ot───┘           │   └───────┘
+	//   │                       │
+	//   └──►Oe                  └──►Oe
+	//
+	// We build the 'after' lists for each of the top blocks Ot and It:
+	//   after[Ot]: Oh, It, Ie
+	//   after[It]: Ih, Ib
 	after := map[ID][]*Block{}

+	// Map from loop header ID to the new top block for the loop.
+	tops := map[ID]*Block{}
+
+	// Order loops to rotate any child loop before adding its top block
+	// to the parent loop's 'after' list.
+	loopOrder := f.Cache.allocIntSlice(len(loopnest.loops))
+	for i := range loopOrder {
+		loopOrder[i] = i
+	}
+	defer f.Cache.freeIntSlice(loopOrder)
+	slices.SortFunc(loopOrder, func(i, j int) int {
+		di := loopnest.loops[i].depth
+		dj := loopnest.loops[j].depth
+		switch {
+		case di > dj:
+			return -1
+		case di < dj:
+			return 1
+		default:
+			return 0
+		}
+	})
+
 	// Check each loop header and decide if we want to move it.
-	for _, loop := range loopnest.loops {
+	for _, loopIdx := range loopOrder {
+		loop := loopnest.loops[loopIdx]
 		b := loop.header
 		var p *Block // b's in-loop predecessor
 		for _, e := range b.Preds {
@ -59,6 +117,7 @@ func loopRotate(f *Func) {
 		if p == nil {
 			continue
 		}
+		tops[loop.header.ID] = p
 		p.Hotness |= HotInitial
 		if f.IsPgoHot {
 			p.Hotness |= HotPgo
@ -80,8 +139,10 @@ func loopRotate(f *Func) {
 			if nextb == p { // original loop predecessor is next
 				break
 			}
-			if loopnest.b2l[nextb.ID] == loop {
-				after[p.ID] = append(after[p.ID], nextb)
+			if bloop := loopnest.b2l[nextb.ID]; bloop != nil {
+				if bloop == loop || bloop.outer == loop && tops[bloop.header.ID] == nextb {
+					after[p.ID] = append(after[p.ID], nextb)
+				}
 			}
 			b = nextb
 		}
@ -90,7 +151,7 @@ func loopRotate(f *Func) {
 		f.Blocks[idToIdx[p.ID]] = loop.header
 		idToIdx[loop.header.ID], idToIdx[p.ID] = idToIdx[p.ID], idToIdx[loop.header.ID]

-		// Place b after p.
+		// Place loop blocks after p.
 		for _, b := range after[p.ID] {
 			move[b.ID] = struct{}{}
 		}
@ -107,16 +168,23 @@ func loopRotate(f *Func) {
 	oldOrder := f.Cache.allocBlockSlice(len(f.Blocks))
 	defer f.Cache.freeBlockSlice(oldOrder)
 	copy(oldOrder, f.Blocks)
+	var moveBlocks func(bs []*Block)
+	moveBlocks = func(blocks []*Block) {
+		for _, a := range blocks {
+			f.Blocks[j] = a
+			j++
+			if nextBlocks, ok := after[a.ID]; ok {
+				moveBlocks(nextBlocks)
+			}
+		}
+	}
 	for _, b := range oldOrder {
 		if _, ok := move[b.ID]; ok {
 			continue
 		}
 		f.Blocks[j] = b
 		j++
-		for _, a := range after[b.ID] {
-			f.Blocks[j] = a
-			j++
-		}
+		moveBlocks(after[b.ID])
 	}
 	if j != len(oldOrder) {
 		f.Fatalf("bad reordering in looprotate")
--- a/src/cmd/compile/internal/ssa/looprotate_test.go
+++ b/src/cmd/compile/internal/ssa/looprotate_test.go
@ -0,0 +1,65 @@
+// Copyright 2025 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package ssa
+
+import (
+	"cmd/compile/internal/types"
+	"testing"
+)
+
+func TestLoopRotateNested(t *testing.T) {
+	c := testConfig(t)
+	fun := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("constTrue", OpConstBool, types.Types[types.TBOOL], 1, nil),
+			Goto("outerHeader")),
+		Bloc("outerHeader",
+			If("constTrue", "outerBody", "outerExit")),
+		Bloc("outerBody",
+			Goto("innerHeader")),
+		Bloc("innerHeader",
+			If("constTrue", "innerBody", "innerExit")),
+		Bloc("innerBody",
+			Goto("innerTop")),
+		Bloc("innerTop",
+			Goto("innerHeader")),
+		Bloc("innerExit",
+			Goto("outerTop")),
+		Bloc("outerTop",
+			Goto("outerHeader")),
+		Bloc("outerExit",
+			Exit("mem")))
+
+	blockName := make([]string, len(fun.f.Blocks)+1)
+	for name, block := range fun.blocks {
+		blockName[block.ID] = name
+	}
+
+	CheckFunc(fun.f)
+	loopRotate(fun.f)
+	CheckFunc(fun.f)
+
+	// Verify the resulting block order
+	expected := []string{
+		"entry",
+		"outerTop",
+		"outerHeader",
+		"outerBody",
+		"innerTop",
+		"innerHeader",
+		"innerBody",
+		"innerExit",
+		"outerExit",
+	}
+	if len(expected) != len(fun.f.Blocks) {
+		t.Fatalf("expected %d blocks, found %d", len(expected), len(fun.f.Blocks))
+	}
+	for i, b := range fun.f.Blocks {
+		if expected[i] != blockName[b.ID] {
+			t.Errorf("position %d: expected %s, found %s", i, expected[i], blockName[b.ID])
+		}
+	}
+}
--- a/src/cmd/compile/internal/ssa/nilcheck.go
+++ b/src/cmd/compile/internal/ssa/nilcheck.go
@ -221,7 +221,8 @@ func nilcheckelim2(f *Func) {

 				// Iteration order means that first nilcheck in the chain wins, others
 				// are bumped into the ordinary statement preservation algorithm.
-				u := b.Values[unnecessary.get(v.Args[0].ID)]
+				uid, _ := unnecessary.get(v.Args[0].ID)
+				u := b.Values[uid]
 				if !u.Type.IsMemory() && !u.Pos.SameFileAndLine(v.Pos) {
 					if u.Pos.IsStmt() == src.PosIsStmt {
 						pendingLines.add(u.Pos)
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@ -6,10 +6,12 @@ package ssa

 import (
 	"cmd/compile/internal/abi"
+	"cmd/compile/internal/base"
 	"cmd/compile/internal/ir"
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"fmt"
+	rtabi "internal/abi"
 	"strings"
 )

@ -68,6 +70,10 @@ type regInfo struct {
 	// clobbers encodes the set of registers that are overwritten by
 	// the instruction (other than the output registers).
 	clobbers regMask
+	// Instruction clobbers the register containing input 0.
+	clobbersArg0 bool
+	// Instruction clobbers the register containing input 1.
+	clobbersArg1 bool
 	// outputs is the same as inputs, but for the outputs of the instruction.
 	outputs []outputInfo
 }
@ -365,6 +371,9 @@ const (
 	auxCall                   // aux is a *ssa.AuxCall
 	auxCallOff                // aux is a *ssa.AuxCall, AuxInt is int64 param (in+out) size

+	auxPanicBoundsC  // constant for a bounds failure
+	auxPanicBoundsCC // two constants for a bounds failure
+
 	// architecture specific aux types
 	auxARM64BitField     // aux is an arm64 bitfield lsb and width packed into auxInt
 	auxS390XRotateParams // aux is a s390x rotate parameters object encoding start bit, end bit and rotate amount
@ -523,6 +532,50 @@ func boundsABI(b int64) int {
 	}
 }

+// Returns the bounds error code needed by the runtime, and
+// whether the x field is signed.
+func (b BoundsKind) Code() (rtabi.BoundsErrorCode, bool) {
+	switch b {
+	case BoundsIndex:
+		return rtabi.BoundsIndex, true
+	case BoundsIndexU:
+		return rtabi.BoundsIndex, false
+	case BoundsSliceAlen:
+		return rtabi.BoundsSliceAlen, true
+	case BoundsSliceAlenU:
+		return rtabi.BoundsSliceAlen, false
+	case BoundsSliceAcap:
+		return rtabi.BoundsSliceAcap, true
+	case BoundsSliceAcapU:
+		return rtabi.BoundsSliceAcap, false
+	case BoundsSliceB:
+		return rtabi.BoundsSliceB, true
+	case BoundsSliceBU:
+		return rtabi.BoundsSliceB, false
+	case BoundsSlice3Alen:
+		return rtabi.BoundsSlice3Alen, true
+	case BoundsSlice3AlenU:
+		return rtabi.BoundsSlice3Alen, false
+	case BoundsSlice3Acap:
+		return rtabi.BoundsSlice3Acap, true
+	case BoundsSlice3AcapU:
+		return rtabi.BoundsSlice3Acap, false
+	case BoundsSlice3B:
+		return rtabi.BoundsSlice3B, true
+	case BoundsSlice3BU:
+		return rtabi.BoundsSlice3B, false
+	case BoundsSlice3C:
+		return rtabi.BoundsSlice3C, true
+	case BoundsSlice3CU:
+		return rtabi.BoundsSlice3C, false
+	case BoundsConvert:
+		return rtabi.BoundsConvert, false
+	default:
+		base.Fatalf("bad bounds kind %d", b)
+		return 0, false
+	}
+}
+
 // arm64BitField is the GO type of ARM64BitField auxInt.
 // if x is an ARM64BitField, then width=x&0xff, lsb=(x>>8)&0xff, and
 // width+lsb<64 for 64-bit variant, width+lsb<32 for 32-bit variant.
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@ -569,12 +569,12 @@ const (
 	Op386LoweredGetCallerSP
 	Op386LoweredNilCheck
 	Op386LoweredWB
-	Op386LoweredPanicBoundsA
-	Op386LoweredPanicBoundsB
-	Op386LoweredPanicBoundsC
-	Op386LoweredPanicExtendA
-	Op386LoweredPanicExtendB
-	Op386LoweredPanicExtendC
+	Op386LoweredPanicBoundsRR
+	Op386LoweredPanicBoundsRC
+	Op386LoweredPanicBoundsCR
+	Op386LoweredPanicBoundsCC
+	Op386LoweredPanicExtendRR
+	Op386LoweredPanicExtendRC
 	Op386FlagEQ
 	Op386FlagLT_ULT
 	Op386FlagLT_UGT
@ -1051,7 +1051,8 @@ const (
 	OpAMD64MOVLstoreconstidx4
 	OpAMD64MOVQstoreconstidx1
 	OpAMD64MOVQstoreconstidx8
-	OpAMD64DUFFZERO
+	OpAMD64LoweredZero
+	OpAMD64LoweredZeroLoop
 	OpAMD64REPSTOSQ
 	OpAMD64CALLstatic
 	OpAMD64CALLtail
@ -1067,9 +1068,10 @@ const (
 	OpAMD64LoweredNilCheck
 	OpAMD64LoweredWB
 	OpAMD64LoweredHasCPUFeature
-	OpAMD64LoweredPanicBoundsA
-	OpAMD64LoweredPanicBoundsB
-	OpAMD64LoweredPanicBoundsC
+	OpAMD64LoweredPanicBoundsRR
+	OpAMD64LoweredPanicBoundsRC
+	OpAMD64LoweredPanicBoundsCR
+	OpAMD64LoweredPanicBoundsCC
 	OpAMD64FlagEQ
 	OpAMD64FlagLT_ULT
 	OpAMD64FlagLT_UGT
@ -2478,12 +2480,12 @@ const (
 	OpARMLoweredGetClosurePtr
 	OpARMLoweredGetCallerSP
 	OpARMLoweredGetCallerPC
-	OpARMLoweredPanicBoundsA
-	OpARMLoweredPanicBoundsB
-	OpARMLoweredPanicBoundsC
-	OpARMLoweredPanicExtendA
-	OpARMLoweredPanicExtendB
-	OpARMLoweredPanicExtendC
+	OpARMLoweredPanicBoundsRR
+	OpARMLoweredPanicBoundsRC
+	OpARMLoweredPanicBoundsCR
+	OpARMLoweredPanicBoundsCC
+	OpARMLoweredPanicExtendRR
+	OpARMLoweredPanicExtendRC
 	OpARMFlagConstant
 	OpARMInvertFlags
 	OpARMLoweredWB
@ -2814,9 +2816,10 @@ const (
 	OpARM64LoweredAtomicAnd32Variant
 	OpARM64LoweredAtomicOr32Variant
 	OpARM64LoweredWB
-	OpARM64LoweredPanicBoundsA
-	OpARM64LoweredPanicBoundsB
-	OpARM64LoweredPanicBoundsC
+	OpARM64LoweredPanicBoundsRR
+	OpARM64LoweredPanicBoundsRC
+	OpARM64LoweredPanicBoundsCR
+	OpARM64LoweredPanicBoundsCC
 	OpARM64PRFM
 	OpARM64DMB
 	OpARM64ZERO
@ -3018,9 +3021,10 @@ const (
 	OpLOONG64LoweredGetCallerPC
 	OpLOONG64LoweredWB
 	OpLOONG64LoweredPubBarrier
-	OpLOONG64LoweredPanicBoundsA
-	OpLOONG64LoweredPanicBoundsB
-	OpLOONG64LoweredPanicBoundsC
+	OpLOONG64LoweredPanicBoundsRR
+	OpLOONG64LoweredPanicBoundsRC
+	OpLOONG64LoweredPanicBoundsCR
+	OpLOONG64LoweredPanicBoundsCC
 	OpLOONG64PRELD
 	OpLOONG64PRELDX

@ -3134,12 +3138,12 @@ const (
 	OpMIPSLoweredGetCallerPC
 	OpMIPSLoweredWB
 	OpMIPSLoweredPubBarrier
-	OpMIPSLoweredPanicBoundsA
-	OpMIPSLoweredPanicBoundsB
-	OpMIPSLoweredPanicBoundsC
-	OpMIPSLoweredPanicExtendA
-	OpMIPSLoweredPanicExtendB
-	OpMIPSLoweredPanicExtendC
+	OpMIPSLoweredPanicBoundsRR
+	OpMIPSLoweredPanicBoundsRC
+	OpMIPSLoweredPanicBoundsCR
+	OpMIPSLoweredPanicBoundsCC
+	OpMIPSLoweredPanicExtendRR
+	OpMIPSLoweredPanicExtendRC

 	OpMIPS64ADDV
 	OpMIPS64ADDVconst
@ -3266,9 +3270,10 @@ const (
 	OpMIPS64LoweredGetCallerPC
 	OpMIPS64LoweredWB
 	OpMIPS64LoweredPubBarrier
-	OpMIPS64LoweredPanicBoundsA
-	OpMIPS64LoweredPanicBoundsB
-	OpMIPS64LoweredPanicBoundsC
+	OpMIPS64LoweredPanicBoundsRR
+	OpMIPS64LoweredPanicBoundsRC
+	OpMIPS64LoweredPanicBoundsCR
+	OpMIPS64LoweredPanicBoundsCC

 	OpPPC64ADD
 	OpPPC64ADDCC
@ -3637,9 +3642,10 @@ const (
 	OpRISCV64LoweredGetCallerPC
 	OpRISCV64LoweredWB
 	OpRISCV64LoweredPubBarrier
-	OpRISCV64LoweredPanicBoundsA
-	OpRISCV64LoweredPanicBoundsB
-	OpRISCV64LoweredPanicBoundsC
+	OpRISCV64LoweredPanicBoundsRR
+	OpRISCV64LoweredPanicBoundsRC
+	OpRISCV64LoweredPanicBoundsCR
+	OpRISCV64LoweredPanicBoundsCC
 	OpRISCV64FADDS
 	OpRISCV64FSUBS
 	OpRISCV64FMULS
@ -3708,6 +3714,10 @@ const (
 	OpS390XLPDFR
 	OpS390XLNDFR
 	OpS390XCPSDR
+	OpS390XWFMAXDB
+	OpS390XWFMAXSB
+	OpS390XWFMINDB
+	OpS390XWFMINSB
 	OpS390XFIDBR
 	OpS390XFMOVSload
 	OpS390XFMOVDload
@ -3890,9 +3900,10 @@ const (
 	OpS390XLoweredRound32F
 	OpS390XLoweredRound64F
 	OpS390XLoweredWB
-	OpS390XLoweredPanicBoundsA
-	OpS390XLoweredPanicBoundsB
-	OpS390XLoweredPanicBoundsC
+	OpS390XLoweredPanicBoundsRR
+	OpS390XLoweredPanicBoundsRC
+	OpS390XLoweredPanicBoundsCR
+	OpS390XLoweredPanicBoundsCC
 	OpS390XFlagEQ
 	OpS390XFlagLT
 	OpS390XFlagGT
@ -9499,77 +9510,68 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // DX
-				{1, 8}, // BX
+				{0, 239}, // AX CX DX BX BP SI DI
+				{1, 239}, // AX CX DX BX BP SI DI
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 239}, // AX CX DX BX BP SI DI
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 239}, // AX CX DX BX BP SI DI
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
+	{
+		name:    "LoweredPanicExtendRR",
 		auxType: auxInt64,
+		argLen:  4,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 15},  // AX CX DX BX
+				{1, 15},  // AX CX DX BX
+				{2, 239}, // AX CX DX BX BP SI DI
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicExtendRC",
+		auxType: auxPanicBoundsC,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // CX
-				{1, 4}, // DX
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 1}, // AX
-				{1, 2}, // CX
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendA",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 64}, // SI
-				{1, 4},  // DX
-				{2, 8},  // BX
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendB",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 64}, // SI
-				{1, 2},  // CX
-				{2, 4},  // DX
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendC",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 64}, // SI
-				{1, 1},  // AX
-				{2, 2},  // CX
+				{0, 15}, // AX CX DX BX
+				{1, 15}, // AX CX DX BX
 			},
 		},
 	},
@ -16796,15 +16798,28 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:        "DUFFZERO",
-		auxType:     auxInt64,
-		argLen:      2,
-		unsafePoint: true,
+		name:           "LoweredZero",
+		auxType:        auxInt64,
+		argLen:         2,
+		faultOnNilArg0: true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 128}, // DI
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
-			clobbers: 128, // DI
+		},
+	},
+	{
+		name:           "LoweredZeroLoop",
+		auxType:        auxInt64,
+		argLen:         2,
+		clobberFlags:   true,
+		needIntTemp:    true,
+		faultOnNilArg0: true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			clobbersArg0: true,
 		},
 	},
 	{
@ -16977,41 +16992,46 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // DX
-				{1, 8}, // BX
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // CX
-				{1, 4}, // DX
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 1}, // AX
-				{1, 2}, // CX
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:   "FlagEQ",
 		argLen: 0,
@ -37876,77 +37896,68 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // R2
-				{1, 8}, // R3
+				{0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+				{1, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
+	{
+		name:    "LoweredPanicExtendRR",
 		auxType: auxInt64,
+		argLen:  4,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 15},    // R0 R1 R2 R3
+				{1, 15},    // R0 R1 R2 R3
+				{2, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicExtendRC",
+		auxType: auxPanicBoundsC,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // R1
-				{1, 4}, // R2
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 1}, // R0
-				{1, 2}, // R1
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendA",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 16}, // R4
-				{1, 4},  // R2
-				{2, 8},  // R3
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendB",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 16}, // R4
-				{1, 2},  // R1
-				{2, 4},  // R2
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendC",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 16}, // R4
-				{1, 1},  // R0
-				{2, 2},  // R1
+				{0, 15}, // R0 R1 R2 R3
+				{1, 15}, // R0 R1 R2 R3
 			},
 		},
 	},
@ -42441,41 +42452,46 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // R2
-				{1, 8}, // R3
+				{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
+				{1, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // R1
-				{1, 4}, // R2
+				{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 1}, // R0
-				{1, 2}, // R1
+				{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:           "PRFM",
 		auxType:        auxInt64,
@ -45208,41 +45224,46 @@ var opcodeTable = [...]opInfo{
 		reg:            regInfo{},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4194304}, // R23
-				{1, 8388608}, // R24
+				{0, 524280}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19
+				{1, 524280}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 1048576}, // R21
-				{1, 4194304}, // R23
+				{0, 524280}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 524288},  // R20
-				{1, 1048576}, // R21
+				{0, 524280}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:           "PRELD",
 		auxType:        auxInt64,
@ -46756,77 +46777,68 @@ var opcodeTable = [...]opInfo{
 		reg:            regInfo{},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 8},  // R3
-				{1, 16}, // R4
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
+				{1, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
+	{
+		name:    "LoweredPanicExtendRR",
 		auxType: auxInt64,
+		argLen:  4,
+		call:    true,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 30},     // R1 R2 R3 R4
+				{1, 30},     // R1 R2 R3 R4
+				{2, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
+			},
+		},
+	},
+	{
+		name:    "LoweredPanicExtendRC",
+		auxType: auxPanicBoundsC,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // R2
-				{1, 8}, // R3
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 2}, // R1
-				{1, 4}, // R2
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendA",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 32}, // R5
-				{1, 8},  // R3
-				{2, 16}, // R4
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendB",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 32}, // R5
-				{1, 4},  // R2
-				{2, 8},  // R3
-			},
-		},
-	},
-	{
-		name:    "LoweredPanicExtendC",
-		auxType: auxInt64,
-		argLen:  4,
-		call:    true,
-		reg: regInfo{
-			inputs: []inputInfo{
-				{0, 32}, // R5
-				{1, 2},  // R1
-				{2, 4},  // R2
+				{0, 30}, // R1 R2 R3 R4
+				{1, 30}, // R1 R2 R3 R4
 			},
 		},
 	},
@ -48529,41 +48541,46 @@ var opcodeTable = [...]opInfo{
 		reg:            regInfo{},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 8},  // R3
-				{1, 16}, // R4
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
+				{1, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // R2
-				{1, 8}, // R3
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // R1
-				{1, 4}, // R2
+				{0, 131070}, // R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},

 	{
 		name:        "ADD",
@ -53530,41 +53547,46 @@ var opcodeTable = [...]opInfo{
 		reg:            regInfo{},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 64},        // X7
-				{1, 134217728}, // X28
+				{0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
+				{1, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 32}, // X6
-				{1, 64}, // X7
+				{0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 16}, // X5
-				{1, 32}, // X6
+				{0, 1048560}, // X5 X6 X7 X8 X9 X10 X11 X12 X13 X14 X15 X16 X17 X18 X19 X20
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:        "FADDS",
 		argLen:      2,
@ -54541,6 +54563,62 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:   "WFMAXDB",
+		argLen: 2,
+		asm:    s390x.AWFMAXDB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+				{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+		},
+	},
+	{
+		name:   "WFMAXSB",
+		argLen: 2,
+		asm:    s390x.AWFMAXSB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+				{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+		},
+	},
+	{
+		name:   "WFMINDB",
+		argLen: 2,
+		asm:    s390x.AWFMINDB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+				{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+		},
+	},
+	{
+		name:   "WFMINSB",
+		argLen: 2,
+		asm:    s390x.AWFMINSB,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+				{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+			outputs: []outputInfo{
+				{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
+			},
+		},
+	},
 	{
 		name:    "FIDBR",
 		auxType: auxInt8,
@ -57211,41 +57289,46 @@ var opcodeTable = [...]opInfo{
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsA",
+		name:    "LoweredPanicBoundsRR",
 		auxType: auxInt64,
 		argLen:  3,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 4}, // R2
-				{1, 8}, // R3
+				{0, 7167}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12
+				{1, 7167}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsB",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsRC",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 2}, // R1
-				{1, 4}, // R2
+				{0, 7167}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12
 			},
 		},
 	},
 	{
-		name:    "LoweredPanicBoundsC",
-		auxType: auxInt64,
-		argLen:  3,
+		name:    "LoweredPanicBoundsCR",
+		auxType: auxPanicBoundsC,
+		argLen:  2,
 		call:    true,
 		reg: regInfo{
 			inputs: []inputInfo{
-				{0, 1}, // R0
-				{1, 2}, // R1
+				{0, 7167}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R11 R12
 			},
 		},
 	},
+	{
+		name:    "LoweredPanicBoundsCC",
+		auxType: auxPanicBoundsCC,
+		argLen:  1,
+		call:    true,
+		reg:     regInfo{},
+	},
 	{
 		name:   "FlagEQ",
 		argLen: 0,
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@ -5,6 +5,7 @@
 package ssa

 import (
+	"cmd/compile/internal/types"
 	"cmd/internal/src"
 	"fmt"
 	"math"
@ -2132,6 +2133,41 @@ func addRestrictions(parent *Block, ft *factsTable, t domain, v, w *Value, r rel
 	}
 }

+func unsignedAddOverflows(a, b uint64, t *types.Type) bool {
+	switch t.Size() {
+	case 8:
+		return a+b < a
+	case 4:
+		return a+b > math.MaxUint32
+	case 2:
+		return a+b > math.MaxUint16
+	case 1:
+		return a+b > math.MaxUint8
+	default:
+		panic("unreachable")
+	}
+}
+
+func signedAddOverflowsOrUnderflows(a, b int64, t *types.Type) bool {
+	r := a + b
+	switch t.Size() {
+	case 8:
+		return (a >= 0 && b >= 0 && r < 0) || (a < 0 && b < 0 && r >= 0)
+	case 4:
+		return r < math.MinInt32 || math.MaxInt32 < r
+	case 2:
+		return r < math.MinInt16 || math.MaxInt16 < r
+	case 1:
+		return r < math.MinInt8 || math.MaxInt8 < r
+	default:
+		panic("unreachable")
+	}
+}
+
+func unsignedSubUnderflows(a, b uint64) bool {
+	return a < b
+}
+
 func addLocalFacts(ft *factsTable, b *Block) {
 	// Propagate constant ranges among values in this block.
 	// We do this before the second loop so that we have the
@ -2151,6 +2187,60 @@ func addLocalFacts(ft *factsTable, b *Block) {
 		// FIXME(go.dev/issue/68857): this loop only set up limits properly when b.Values is in topological order.
 		// flowLimit can also depend on limits given by this loop which right now is not handled.
 		switch v.Op {
+		case OpAdd64, OpAdd32, OpAdd16, OpAdd8:
+			x := ft.limits[v.Args[0].ID]
+			y := ft.limits[v.Args[1].ID]
+			if !unsignedAddOverflows(x.umax, y.umax, v.Type) {
+				r := gt
+				if !x.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[1], unsigned, r)
+				r = gt
+				if !y.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[0], unsigned, r)
+			}
+			if x.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
+				r := gt
+				if !x.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[1], signed, r)
+			}
+			if y.min >= 0 && !signedAddOverflowsOrUnderflows(x.max, y.max, v.Type) {
+				r := gt
+				if !y.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[0], signed, r)
+			}
+			if x.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
+				r := lt
+				if !x.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[1], signed, r)
+			}
+			if y.max <= 0 && !signedAddOverflowsOrUnderflows(x.min, y.min, v.Type) {
+				r := lt
+				if !y.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[0], signed, r)
+			}
+		case OpSub64, OpSub32, OpSub16, OpSub8:
+			x := ft.limits[v.Args[0].ID]
+			y := ft.limits[v.Args[1].ID]
+			if !unsignedSubUnderflows(x.umin, y.umax) {
+				r := lt
+				if !y.nonzero() {
+					r |= eq
+				}
+				ft.update(b, v, v.Args[0], unsigned, r)
+			}
+			// FIXME: we could also do signed facts but the overflow checks are much trickier and I don't need it yet.
 		case OpAnd64, OpAnd32, OpAnd16, OpAnd8:
 			ft.update(b, v, v.Args[0], unsigned, lt|eq)
 			ft.update(b, v, v.Args[1], unsigned, lt|eq)
@ -2177,6 +2267,10 @@ func addLocalFacts(ft *factsTable, b *Block) {
 			// the mod instruction executes (and thus panics if the
 			// modulus is 0). See issue 67625.
 			ft.update(b, v, v.Args[1], unsigned, lt)
+		case OpStringLen:
+			if v.Args[0].Op == OpStringMake {
+				ft.update(b, v, v.Args[0].Args[1], signed, eq)
+			}
 		case OpSliceLen:
 			if v.Args[0].Op == OpSliceMake {
 				ft.update(b, v, v.Args[0].Args[1], signed, eq)
--- a/src/cmd/compile/internal/ssa/regalloc.go
+++ b/src/cmd/compile/internal/ssa/regalloc.go
@ -1591,6 +1591,12 @@ func (s *regAllocState) regalloc(f *Func) {
 						mask &^= desired.avoid
 					}
 				}
+				if mask&s.values[v.Args[i.idx].ID].regs&(1<<s.SPReg) != 0 {
+					// Prefer SP register. This ensures that local variables
+					// use SP as their base register (instead of a copy of the
+					// stack pointer living in another register). See issue 74836.
+					mask = 1 << s.SPReg
+				}
 				args[i.idx] = s.allocValToReg(v.Args[i.idx], mask, true, v.Pos)
 			}

@ -1694,8 +1700,38 @@ func (s *regAllocState) regalloc(f *Func) {
 					}
 				}
 			}
-
 		ok:
+			for i := 0; i < 2; i++ {
+				if !(i == 0 && regspec.clobbersArg0 || i == 1 && regspec.clobbersArg1) {
+					continue
+				}
+				if !s.liveAfterCurrentInstruction(v.Args[i]) {
+					// arg is dead.  We can clobber its register.
+					continue
+				}
+				if s.values[v.Args[i].ID].rematerializeable {
+					// We can rematerialize the input, don't worry about clobbering it.
+					continue
+				}
+				if countRegs(s.values[v.Args[i].ID].regs) >= 2 {
+					// We have at least 2 copies of arg.  We can afford to clobber one.
+					continue
+				}
+				// Possible new registers to copy into.
+				m := s.compatRegs(v.Args[i].Type) &^ s.used
+				if m == 0 {
+					// No free registers.  In this case we'll just clobber the
+					// input and future uses of that input must use a restore.
+					// TODO(khr): We should really do this like allocReg does it,
+					// spilling the value with the most distant next use.
+					continue
+				}
+				// Copy input to a new clobberable register.
+				c := s.allocValToReg(v.Args[i], m, true, v.Pos)
+				s.copies[c] = false
+				args[i] = c
+			}
+
 			// Pick a temporary register if needed.
 			// It should be distinct from all the input registers, so we
 			// allocate it after all the input registers, but before
@ -1717,6 +1753,13 @@ func (s *regAllocState) regalloc(f *Func) {
 				s.tmpused |= regMask(1) << tmpReg
 			}

+			if regspec.clobbersArg0 {
+				s.freeReg(register(s.f.getHome(args[0].ID).(*Register).num))
+			}
+			if regspec.clobbersArg1 {
+				s.freeReg(register(s.f.getHome(args[1].ID).(*Register).num))
+			}
+
 			// Now that all args are in regs, we're ready to issue the value itself.
 			// Before we pick a register for the output value, allow input registers
 			// to be deallocated. We do this here so that the output can use the
@ -2743,7 +2786,7 @@ func (s *regAllocState) computeLive() {
 	// out to all of them.
 	po := f.postorder()
 	s.loopnest = f.loopnest()
-	s.loopnest.calculateDepths()
+	s.loopnest.computeUnavoidableCalls()
 	for {
 		changed := false

@ -3050,3 +3093,72 @@ func (d *desiredState) merge(x *desiredState) {
 		d.addList(e.ID, e.regs)
 	}
 }
+
+// computeUnavoidableCalls computes the containsUnavoidableCall fields in the loop nest.
+func (loopnest *loopnest) computeUnavoidableCalls() {
+	f := loopnest.f
+
+	hasCall := f.Cache.allocBoolSlice(f.NumBlocks())
+	defer f.Cache.freeBoolSlice(hasCall)
+	for _, b := range f.Blocks {
+		if b.containsCall() {
+			hasCall[b.ID] = true
+		}
+	}
+	found := f.Cache.allocSparseSet(f.NumBlocks())
+	defer f.Cache.freeSparseSet(found)
+	// Run dfs to find path through the loop that avoids all calls.
+	// Such path either escapes the loop or returns back to the header.
+	// It isn't enough to have exit not dominated by any call, for example:
+	// ... some loop
+	// call1    call2
+	//   \       /
+	//     block
+	// ...
+	// block is not dominated by any single call, but we don't have call-free path to it.
+loopLoop:
+	for _, l := range loopnest.loops {
+		found.clear()
+		tovisit := make([]*Block, 0, 8)
+		tovisit = append(tovisit, l.header)
+		for len(tovisit) > 0 {
+			cur := tovisit[len(tovisit)-1]
+			tovisit = tovisit[:len(tovisit)-1]
+			if hasCall[cur.ID] {
+				continue
+			}
+			for _, s := range cur.Succs {
+				nb := s.Block()
+				if nb == l.header {
+					// Found a call-free path around the loop.
+					continue loopLoop
+				}
+				if found.contains(nb.ID) {
+					// Already found via another path.
+					continue
+				}
+				nl := loopnest.b2l[nb.ID]
+				if nl == nil || (nl.depth <= l.depth && nl != l) {
+					// Left the loop.
+					continue
+				}
+				tovisit = append(tovisit, nb)
+				found.add(nb.ID)
+			}
+		}
+		// No call-free path was found.
+		l.containsUnavoidableCall = true
+	}
+}
+
+func (b *Block) containsCall() bool {
+	if b.Kind == BlockDefer {
+		return true
+	}
+	for _, v := range b.Values {
+		if opcodeTable[v.Op].call {
+			return true
+		}
+	}
+	return false
+}
--- a/src/cmd/compile/internal/ssa/regalloc_test.go
+++ b/src/cmd/compile/internal/ssa/regalloc_test.go
@ -6,6 +6,7 @@ package ssa

 import (
 	"cmd/compile/internal/types"
+	"fmt"
 	"testing"
 )

@ -218,10 +219,37 @@ func TestSpillMove2(t *testing.T) {

 }

+func TestClobbersArg0(t *testing.T) {
+	c := testConfig(t)
+	f := c.Fun("entry",
+		Bloc("entry",
+			Valu("mem", OpInitMem, types.TypeMem, 0, nil),
+			Valu("ptr", OpArg, c.config.Types.Int64.PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo())),
+			Valu("dst", OpArg, c.config.Types.Int64.PtrTo().PtrTo(), 0, c.Temp(c.config.Types.Int64.PtrTo().PtrTo())),
+			Valu("zero", OpAMD64LoweredZeroLoop, types.TypeMem, 256, nil, "ptr", "mem"),
+			Valu("store", OpAMD64MOVQstore, types.TypeMem, 0, nil, "dst", "ptr", "zero"),
+			Exit("store")))
+	flagalloc(f.f)
+	regalloc(f.f)
+	checkFunc(f.f)
+	// LoweredZeroLoop clobbers its argument, so there must be a copy of "ptr" somewhere
+	// so we still have that value available at "store".
+	if n := numCopies(f.blocks["entry"]); n != 1 {
+		fmt.Printf("%s\n", f.f.String())
+		t.Errorf("got %d copies, want 1", n)
+	}
+}
+
 func numSpills(b *Block) int {
+	return numOps(b, OpStoreReg)
+}
+func numCopies(b *Block) int {
+	return numOps(b, OpCopy)
+}
+func numOps(b *Block, op Op) int {
 	n := 0
 	for _, v := range b.Values {
-		if v.Op == OpStoreReg {
+		if v.Op == op {
 			n++
 		}
 	}
--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@ -29,6 +29,8 @@ type deadValueChoice bool
 const (
 	leaveDeadValues  deadValueChoice = false
 	removeDeadValues                 = true
+
+	repZeroThreshold = 1408 // size beyond which we use REP STOS for zeroing
 )

 // deadcode indicates whether rewrite should try to remove any values that become dead.
@ -199,16 +201,18 @@ func applyRewrite(f *Func, rb blockRewriter, rv valueRewriter, deadcode deadValu
 				f.freeValue(v)
 				continue
 			}
-			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) && pendingLines.get(vl) == int32(b.ID) {
-				pendingLines.remove(vl)
-				v.Pos = v.Pos.WithIsStmt()
+			if v.Pos.IsStmt() != src.PosNotStmt && !notStmtBoundary(v.Op) {
+				if pl, ok := pendingLines.get(vl); ok && pl == int32(b.ID) {
+					pendingLines.remove(vl)
+					v.Pos = v.Pos.WithIsStmt()
+				}
 			}
 			if i != j {
 				b.Values[j] = v
 			}
 			j++
 		}
-		if pendingLines.get(b.Pos) == int32(b.ID) {
+		if pl, ok := pendingLines.get(b.Pos); ok && pl == int32(b.ID) {
 			b.Pos = b.Pos.WithIsStmt()
 			pendingLines.remove(b.Pos)
 		}
@ -301,7 +305,6 @@ func canMergeLoadClobber(target, load, x *Value) bool {
 		return false
 	}
 	loopnest := x.Block.Func.loopnest()
-	loopnest.calculateDepths()
 	if loopnest.depth(target.Block.ID) > loopnest.depth(x.Block.ID) {
 		return false
 	}
@ -479,30 +482,28 @@ func nto(x int64) int64 {

 // logX returns logarithm of n base 2.
 // n must be a positive power of 2 (isPowerOfTwoX returns true).
-func log8(n int8) int64 {
-	return int64(bits.Len8(uint8(n))) - 1
-}
-func log16(n int16) int64 {
-	return int64(bits.Len16(uint16(n))) - 1
-}
-func log32(n int32) int64 {
-	return int64(bits.Len32(uint32(n))) - 1
-}
-func log64(n int64) int64 {
-	return int64(bits.Len64(uint64(n))) - 1
-}
+func log8(n int8) int64   { return log8u(uint8(n)) }
+func log16(n int16) int64 { return log16u(uint16(n)) }
+func log32(n int32) int64 { return log32u(uint32(n)) }
+func log64(n int64) int64 { return log64u(uint64(n)) }

-// log2uint32 returns logarithm in base 2 of uint32(n), with log2(0) = -1.
-// Rounds down.
-func log2uint32(n int64) int64 {
-	return int64(bits.Len32(uint32(n))) - 1
-}
+// logXu returns the logarithm of n base 2.
+// n must be a power of 2 (isUnsignedPowerOfTwo returns true)
+func log8u(n uint8) int64   { return int64(bits.Len8(n)) - 1 }
+func log16u(n uint16) int64 { return int64(bits.Len16(n)) - 1 }
+func log32u(n uint32) int64 { return int64(bits.Len32(n)) - 1 }
+func log64u(n uint64) int64 { return int64(bits.Len64(n)) - 1 }

 // isPowerOfTwoX functions report whether n is a power of 2.
 func isPowerOfTwo[T int8 | int16 | int32 | int64](n T) bool {
 	return n > 0 && n&(n-1) == 0
 }

+// isUnsignedPowerOfTwo reports whether n is an unsigned power of 2.
+func isUnsignedPowerOfTwo[T uint8 | uint16 | uint32 | uint64](n T) bool {
+	return n != 0 && n&(n-1) == 0
+}
+
 // isUint64PowerOfTwo reports whether uint64(n) is a power of 2.
 func isUint64PowerOfTwo(in int64) bool {
 	n := uint64(in)
@ -2671,3 +2672,32 @@ func flagify(v *Value) bool {
 	v.AddArg(inner)
 	return true
 }
+
+// PanicBoundsC contains a constant for a bounds failure.
+type PanicBoundsC struct {
+	C int64
+}
+
+// PanicBoundsCC contains 2 constants for a bounds failure.
+type PanicBoundsCC struct {
+	Cx int64
+	Cy int64
+}
+
+func (p PanicBoundsC) CanBeAnSSAAux() {
+}
+func (p PanicBoundsCC) CanBeAnSSAAux() {
+}
+
+func auxToPanicBoundsC(i Aux) PanicBoundsC {
+	return i.(PanicBoundsC)
+}
+func auxToPanicBoundsCC(i Aux) PanicBoundsCC {
+	return i.(PanicBoundsCC)
+}
+func panicBoundsCToAux(p PanicBoundsC) Aux {
+	return p
+}
+func panicBoundsCCToAux(p PanicBoundsCC) Aux {
+	return p
+}
--- a/src/cmd/compile/internal/ssa/rewrite386.go
+++ b/src/cmd/compile/internal/ssa/rewrite386.go
@ -75,6 +75,14 @@ func rewriteValue386(v *Value) bool {
 		return rewriteValue386_Op386LEAL4(v)
 	case Op386LEAL8:
 		return rewriteValue386_Op386LEAL8(v)
+	case Op386LoweredPanicBoundsRC:
+		return rewriteValue386_Op386LoweredPanicBoundsRC(v)
+	case Op386LoweredPanicBoundsRR:
+		return rewriteValue386_Op386LoweredPanicBoundsRR(v)
+	case Op386LoweredPanicExtendRC:
+		return rewriteValue386_Op386LoweredPanicExtendRC(v)
+	case Op386LoweredPanicExtendRR:
+		return rewriteValue386_Op386LoweredPanicExtendRR(v)
 	case Op386MOVBLSX:
 		return rewriteValue386_Op386MOVBLSX(v)
 	case Op386MOVBLSXload:
@ -558,9 +566,11 @@ func rewriteValue386(v *Value) bool {
 		v.Op = Op386ORL
 		return true
 	case OpPanicBounds:
-		return rewriteValue386_OpPanicBounds(v)
+		v.Op = Op386LoweredPanicBoundsRR
+		return true
 	case OpPanicExtend:
-		return rewriteValue386_OpPanicExtend(v)
+		v.Op = Op386LoweredPanicExtendRR
+		return true
 	case OpRotateLeft16:
 		v.Op = Op386ROLW
 		return true
@ -3398,6 +3408,135 @@ func rewriteValue386_Op386LEAL8(v *Value) bool {
 	}
 	return false
 }
+func rewriteValue386_Op386LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVLconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != Op386MOVLconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		mem := v_1
+		v.reset(Op386LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(c), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValue386_Op386LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVLconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != Op386MOVLconst {
+			break
+		}
+		c := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(Op386LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVLconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != Op386MOVLconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(Op386LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
+func rewriteValue386_Op386LoweredPanicExtendRC(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRC [kind] {p} (MOVLconst [hi]) (MOVLconst [lo]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != Op386MOVLconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != Op386MOVLconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(Op386LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(hi)<<32 + int64(uint32(lo)), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValue386_Op386LoweredPanicExtendRR(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRR [kind] hi lo (MOVLconst [c]) mem)
+	// result: (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		hi := v_0
+		lo := v_1
+		if v_2.Op != Op386MOVLconst {
+			break
+		}
+		c := auxIntToInt32(v_2.AuxInt)
+		mem := v_3
+		v.reset(Op386LoweredPanicExtendRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg3(hi, lo, mem)
+		return true
+	}
+	// match: (LoweredPanicExtendRR [kind] (MOVLconst [hi]) (MOVLconst [lo]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != Op386MOVLconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != Op386MOVLconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		y := v_2
+		mem := v_3
+		v.reset(Op386LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(hi)<<32 + int64(uint32(lo))})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValue386_Op386MOVBLSX(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@ -9313,118 +9452,6 @@ func rewriteValue386_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValue386_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(Op386LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(Op386LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(Op386LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
-func rewriteValue386_OpPanicExtend(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicExtendA [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(Op386LoweredPanicExtendA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicExtendB [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(Op386LoweredPanicExtendB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicExtendC [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(Op386LoweredPanicExtendC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValue386_OpRsh16Ux16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
@ -215,6 +215,12 @@ func rewriteValueAMD64(v *Value) bool {
 		return rewriteValueAMD64_OpAMD64LEAQ4(v)
 	case OpAMD64LEAQ8:
 		return rewriteValueAMD64_OpAMD64LEAQ8(v)
+	case OpAMD64LoweredPanicBoundsCR:
+		return rewriteValueAMD64_OpAMD64LoweredPanicBoundsCR(v)
+	case OpAMD64LoweredPanicBoundsRC:
+		return rewriteValueAMD64_OpAMD64LoweredPanicBoundsRC(v)
+	case OpAMD64LoweredPanicBoundsRR:
+		return rewriteValueAMD64_OpAMD64LoweredPanicBoundsRR(v)
 	case OpAMD64MOVBELstore:
 		return rewriteValueAMD64_OpAMD64MOVBELstore(v)
 	case OpAMD64MOVBEQstore:
@ -3431,7 +3437,8 @@ func rewriteValueAMD64(v *Value) bool {
 	case OpPairDotProdMaskedInt16x8:
 		return rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v)
 	case OpPanicBounds:
-		return rewriteValueAMD64_OpPanicBounds(v)
+		v.Op = OpAMD64LoweredPanicBoundsRR
+		return true
 	case OpPermute2Float32x16:
 		v.Op = OpAMD64VPERMI2PS512
 		return true
@ -14127,6 +14134,86 @@ func rewriteValueAMD64_OpAMD64LEAQ8(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueAMD64_OpAMD64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVQconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpAMD64MOVQconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpAMD64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVQconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpAMD64MOVQconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpAMD64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueAMD64_OpAMD64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVQconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpAMD64MOVQconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpAMD64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVQconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpAMD64MOVQconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpAMD64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueAMD64_OpAMD64MOVBELstore(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@ -45567,60 +45654,6 @@ func rewriteValueAMD64_OpPairDotProdMaskedInt16x8(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueAMD64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpAMD64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpAMD64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpAMD64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueAMD64_OpPermute2MaskedFloat32x16(v *Value) bool {
 	v_3 := v.Args[3]
 	v_2 := v.Args[2]
@ -55499,19 +55532,49 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
 		return true
 	}
 	// match: (Zero [s] destptr mem)
-	// cond: s%16 != 0 && s > 16
-	// result: (Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
+	// cond: s >= 16 && s < 192
+	// result: (LoweredZero [s] destptr mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		destptr := v_0
 		mem := v_1
-		if !(s%16 != 0 && s > 16) {
+		if !(s >= 16 && s < 192) {
+			break
+		}
+		v.reset(OpAMD64LoweredZero)
+		v.AuxInt = int64ToAuxInt(s)
+		v.AddArg2(destptr, mem)
+		return true
+	}
+	// match: (Zero [s] destptr mem)
+	// cond: s >= 192 && s <= repZeroThreshold
+	// result: (LoweredZeroLoop [s] destptr mem)
+	for {
+		s := auxIntToInt64(v.AuxInt)
+		destptr := v_0
+		mem := v_1
+		if !(s >= 192 && s <= repZeroThreshold) {
+			break
+		}
+		v.reset(OpAMD64LoweredZeroLoop)
+		v.AuxInt = int64ToAuxInt(s)
+		v.AddArg2(destptr, mem)
+		return true
+	}
+	// match: (Zero [s] destptr mem)
+	// cond: s > repZeroThreshold && s%8 != 0
+	// result: (Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8]) (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
+	for {
+		s := auxIntToInt64(v.AuxInt)
+		destptr := v_0
+		mem := v_1
+		if !(s > repZeroThreshold && s%8 != 0) {
 			break
 		}
 		v.reset(OpZero)
-		v.AuxInt = int64ToAuxInt(s - s%16)
+		v.AuxInt = int64ToAuxInt(s - s%8)
 		v0 := b.NewValue0(v.Pos, OpOffPtr, destptr.Type)
-		v0.AuxInt = int64ToAuxInt(s % 16)
+		v0.AuxInt = int64ToAuxInt(s % 8)
 		v0.AddArg(destptr)
 		v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
 		v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
@ -55519,99 +55582,14 @@ func rewriteValueAMD64_OpZero(v *Value) bool {
 		v.AddArg2(v0, v1)
 		return true
 	}
-	// match: (Zero [16] destptr mem)
-	// result: (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)
-	for {
-		if auxIntToInt64(v.AuxInt) != 16 {
-			break
-		}
-		destptr := v_0
-		mem := v_1
-		v.reset(OpAMD64MOVOstoreconst)
-		v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
-		v.AddArg2(destptr, mem)
-		return true
-	}
-	// match: (Zero [32] destptr mem)
-	// result: (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))
-	for {
-		if auxIntToInt64(v.AuxInt) != 32 {
-			break
-		}
-		destptr := v_0
-		mem := v_1
-		v.reset(OpAMD64MOVOstoreconst)
-		v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
-		v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
-		v0.AddArg2(destptr, mem)
-		v.AddArg2(destptr, v0)
-		return true
-	}
-	// match: (Zero [48] destptr mem)
-	// result: (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem)))
-	for {
-		if auxIntToInt64(v.AuxInt) != 48 {
-			break
-		}
-		destptr := v_0
-		mem := v_1
-		v.reset(OpAMD64MOVOstoreconst)
-		v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 32))
-		v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
-		v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
-		v1.AddArg2(destptr, mem)
-		v0.AddArg2(destptr, v1)
-		v.AddArg2(destptr, v0)
-		return true
-	}
-	// match: (Zero [64] destptr mem)
-	// result: (MOVOstoreconst [makeValAndOff(0,48)] destptr (MOVOstoreconst [makeValAndOff(0,32)] destptr (MOVOstoreconst [makeValAndOff(0,16)] destptr (MOVOstoreconst [makeValAndOff(0,0)] destptr mem))))
-	for {
-		if auxIntToInt64(v.AuxInt) != 64 {
-			break
-		}
-		destptr := v_0
-		mem := v_1
-		v.reset(OpAMD64MOVOstoreconst)
-		v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 48))
-		v0 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v0.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 32))
-		v1 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v1.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 16))
-		v2 := b.NewValue0(v.Pos, OpAMD64MOVOstoreconst, types.TypeMem)
-		v2.AuxInt = valAndOffToAuxInt(makeValAndOff(0, 0))
-		v2.AddArg2(destptr, mem)
-		v1.AddArg2(destptr, v2)
-		v0.AddArg2(destptr, v1)
-		v.AddArg2(destptr, v0)
-		return true
-	}
 	// match: (Zero [s] destptr mem)
-	// cond: s > 64 && s <= 1024 && s%16 == 0
-	// result: (DUFFZERO [s] destptr mem)
-	for {
-		s := auxIntToInt64(v.AuxInt)
-		destptr := v_0
-		mem := v_1
-		if !(s > 64 && s <= 1024 && s%16 == 0) {
-			break
-		}
-		v.reset(OpAMD64DUFFZERO)
-		v.AuxInt = int64ToAuxInt(s)
-		v.AddArg2(destptr, mem)
-		return true
-	}
-	// match: (Zero [s] destptr mem)
-	// cond: s > 1024 && s%8 == 0
+	// cond: s > repZeroThreshold && s%8 == 0
 	// result: (REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
 	for {
 		s := auxIntToInt64(v.AuxInt)
 		destptr := v_0
 		mem := v_1
-		if !(s > 1024 && s%8 == 0) {
+		if !(s > repZeroThreshold && s%8 == 0) {
 			break
 		}
 		v.reset(OpAMD64REPSTOSQ)
--- a/src/cmd/compile/internal/ssa/rewriteARM.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM.go
@ -151,6 +151,14 @@ func rewriteValueARM(v *Value) bool {
 		return rewriteValueARM_OpARMLessThan(v)
 	case OpARMLessThanU:
 		return rewriteValueARM_OpARMLessThanU(v)
+	case OpARMLoweredPanicBoundsRC:
+		return rewriteValueARM_OpARMLoweredPanicBoundsRC(v)
+	case OpARMLoweredPanicBoundsRR:
+		return rewriteValueARM_OpARMLoweredPanicBoundsRR(v)
+	case OpARMLoweredPanicExtendRC:
+		return rewriteValueARM_OpARMLoweredPanicExtendRC(v)
+	case OpARMLoweredPanicExtendRR:
+		return rewriteValueARM_OpARMLoweredPanicExtendRR(v)
 	case OpARMMOVBUload:
 		return rewriteValueARM_OpARMMOVBUload(v)
 	case OpARMMOVBUloadidx:
@ -745,9 +753,11 @@ func rewriteValueARM(v *Value) bool {
 		v.Op = OpARMOR
 		return true
 	case OpPanicBounds:
-		return rewriteValueARM_OpPanicBounds(v)
+		v.Op = OpARMLoweredPanicBoundsRR
+		return true
 	case OpPanicExtend:
-		return rewriteValueARM_OpPanicExtend(v)
+		v.Op = OpARMLoweredPanicExtendRR
+		return true
 	case OpRotateLeft16:
 		return rewriteValueARM_OpRotateLeft16(v)
 	case OpRotateLeft32:
@ -4548,6 +4558,135 @@ func rewriteValueARM_OpARMLessThanU(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueARM_OpARMLoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVWconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpARMMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpARMLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(c), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpARMLoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVWconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpARMMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpARMLoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVWconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARMMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpARMLoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpARMLoweredPanicExtendRC(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRC [kind] {p} (MOVWconst [hi]) (MOVWconst [lo]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpARMMOVWconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != OpARMMOVWconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpARMLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(hi)<<32 + int64(uint32(lo)), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM_OpARMLoweredPanicExtendRR(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRR [kind] hi lo (MOVWconst [c]) mem)
+	// result: (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		hi := v_0
+		lo := v_1
+		if v_2.Op != OpARMMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_2.AuxInt)
+		mem := v_3
+		v.reset(OpARMLoweredPanicExtendRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg3(hi, lo, mem)
+		return true
+	}
+	// match: (LoweredPanicExtendRR [kind] (MOVWconst [hi]) (MOVWconst [lo]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARMMOVWconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != OpARMMOVWconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		y := v_2
+		mem := v_3
+		v.reset(OpARMLoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(hi)<<32 + int64(uint32(lo))})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueARM_OpARMMOVBUload(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@ -14969,118 +15108,6 @@ func rewriteValueARM_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueARM_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpARMLoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpARMLoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpARMLoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
-func rewriteValueARM_OpPanicExtend(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicExtendA [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpARMLoweredPanicExtendA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicExtendB [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpARMLoweredPanicExtendB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicExtendC [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpARMLoweredPanicExtendC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueARM_OpRotateLeft16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
--- a/src/cmd/compile/internal/ssa/rewriteARM64.go
+++ b/src/cmd/compile/internal/ssa/rewriteARM64.go
@ -180,6 +180,12 @@ func rewriteValueARM64(v *Value) bool {
 		return rewriteValueARM64_OpARM64LessThanNoov(v)
 	case OpARM64LessThanU:
 		return rewriteValueARM64_OpARM64LessThanU(v)
+	case OpARM64LoweredPanicBoundsCR:
+		return rewriteValueARM64_OpARM64LoweredPanicBoundsCR(v)
+	case OpARM64LoweredPanicBoundsRC:
+		return rewriteValueARM64_OpARM64LoweredPanicBoundsRC(v)
+	case OpARM64LoweredPanicBoundsRR:
+		return rewriteValueARM64_OpARM64LoweredPanicBoundsRR(v)
 	case OpARM64MADD:
 		return rewriteValueARM64_OpARM64MADD(v)
 	case OpARM64MADDW:
@ -936,7 +942,8 @@ func rewriteValueARM64(v *Value) bool {
 		v.Op = OpARM64OR
 		return true
 	case OpPanicBounds:
-		return rewriteValueARM64_OpPanicBounds(v)
+		v.Op = OpARM64LoweredPanicBoundsRR
+		return true
 	case OpPopCount16:
 		return rewriteValueARM64_OpPopCount16(v)
 	case OpPopCount32:
@ -1592,6 +1599,66 @@ func rewriteValueARM64_OpARM64ADD(v *Value) bool {
 		}
 		break
 	}
+	// match: (ADD x0 x1:(ANDshiftRA x2:(SLLconst [sl] y) z [63]))
+	// cond: x1.Uses == 1 && x2.Uses == 1
+	// result: (ADDshiftLL x0 (ANDshiftRA <y.Type> y z [63]) [sl])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x0 := v_0
+			x1 := v_1
+			if x1.Op != OpARM64ANDshiftRA || auxIntToInt64(x1.AuxInt) != 63 {
+				continue
+			}
+			z := x1.Args[1]
+			x2 := x1.Args[0]
+			if x2.Op != OpARM64SLLconst {
+				continue
+			}
+			sl := auxIntToInt64(x2.AuxInt)
+			y := x2.Args[0]
+			if !(x1.Uses == 1 && x2.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64ADDshiftLL)
+			v.AuxInt = int64ToAuxInt(sl)
+			v0 := b.NewValue0(v.Pos, OpARM64ANDshiftRA, y.Type)
+			v0.AuxInt = int64ToAuxInt(63)
+			v0.AddArg2(y, z)
+			v.AddArg2(x0, v0)
+			return true
+		}
+		break
+	}
+	// match: (ADD x0 x1:(ANDshiftLL x2:(SRAconst [63] z) y [sl]))
+	// cond: x1.Uses == 1 && x2.Uses == 1
+	// result: (ADDshiftLL x0 (ANDshiftRA <y.Type> y z [63]) [sl])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x0 := v_0
+			x1 := v_1
+			if x1.Op != OpARM64ANDshiftLL {
+				continue
+			}
+			sl := auxIntToInt64(x1.AuxInt)
+			y := x1.Args[1]
+			x2 := x1.Args[0]
+			if x2.Op != OpARM64SRAconst || auxIntToInt64(x2.AuxInt) != 63 {
+				continue
+			}
+			z := x2.Args[0]
+			if !(x1.Uses == 1 && x2.Uses == 1) {
+				continue
+			}
+			v.reset(OpARM64ADDshiftLL)
+			v.AuxInt = int64ToAuxInt(sl)
+			v0 := b.NewValue0(v.Pos, OpARM64ANDshiftRA, y.Type)
+			v0.AuxInt = int64ToAuxInt(63)
+			v0.AddArg2(y, z)
+			v.AddArg2(x0, v0)
+			return true
+		}
+		break
+	}
 	return false
 }
 func rewriteValueARM64_OpARM64ADDSflags(v *Value) bool {
@ -6982,6 +7049,86 @@ func rewriteValueARM64_OpARM64LessThanU(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueARM64_OpARM64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpARM64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpARM64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueARM64_OpARM64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpARM64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpARM64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpARM64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpARM64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueARM64_OpARM64MADD(v *Value) bool {
 	v_2 := v.Args[2]
 	v_1 := v.Args[1]
@ -19790,60 +19937,6 @@ func rewriteValueARM64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueARM64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpARM64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpARM64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpARM64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueARM64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@ -25045,6 +25138,37 @@ func rewriteBlockARM64(b *Block) bool {
 			b.resetWithControl(BlockARM64FGE, cc)
 			return true
 		}
+		// match: (NZ sub:(SUB x y))
+		// cond: sub.Uses == 1
+		// result: (NE (CMP x y))
+		for b.Controls[0].Op == OpARM64SUB {
+			sub := b.Controls[0]
+			y := sub.Args[1]
+			x := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
+			v0.AddArg2(x, y)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
+		// match: (NZ sub:(SUBconst [c] y))
+		// cond: sub.Uses == 1
+		// result: (NE (CMPconst [c] y))
+		for b.Controls[0].Op == OpARM64SUBconst {
+			sub := b.Controls[0]
+			c := auxIntToInt64(sub.AuxInt)
+			y := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
+			v0.AuxInt = int64ToAuxInt(c)
+			v0.AddArg(y)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
 		// match: (NZ (ANDconst [c] x) yes no)
 		// cond: oneBit(c)
 		// result: (TBNZ [int64(ntz64(c))] x yes no)
@ -25083,6 +25207,37 @@ func rewriteBlockARM64(b *Block) bool {
 			return true
 		}
 	case BlockARM64NZW:
+		// match: (NZW sub:(SUB x y))
+		// cond: sub.Uses == 1
+		// result: (NE (CMPW x y))
+		for b.Controls[0].Op == OpARM64SUB {
+			sub := b.Controls[0]
+			y := sub.Args[1]
+			x := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
+			v0.AddArg2(x, y)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
+		// match: (NZW sub:(SUBconst [c] y))
+		// cond: sub.Uses == 1
+		// result: (NE (CMPWconst [int32(c)] y))
+		for b.Controls[0].Op == OpARM64SUBconst {
+			sub := b.Controls[0]
+			c := auxIntToInt64(sub.AuxInt)
+			y := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
+			v0.AuxInt = int32ToAuxInt(int32(c))
+			v0.AddArg(y)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
 		// match: (NZW (ANDconst [c] x) yes no)
 		// cond: oneBit(int64(uint32(c)))
 		// result: (TBNZ [int64(ntz64(int64(uint32(c))))] x yes no)
@ -25312,6 +25467,34 @@ func rewriteBlockARM64(b *Block) bool {
 			return true
 		}
 	case BlockARM64UGT:
+		// match: (UGT (CMPconst [0] x))
+		// result: (NE (CMPconst [0] x))
+		for b.Controls[0].Op == OpARM64CMPconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			x := v_0.Args[0]
+			v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
+			v0.AuxInt = int64ToAuxInt(0)
+			v0.AddArg(x)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
+		// match: (UGT (CMPWconst [0] x))
+		// result: (NE (CMPWconst [0] x))
+		for b.Controls[0].Op == OpARM64CMPWconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt32(v_0.AuxInt) != 0 {
+				break
+			}
+			x := v_0.Args[0]
+			v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
+			v0.AuxInt = int32ToAuxInt(0)
+			v0.AddArg(x)
+			b.resetWithControl(BlockARM64NE, v0)
+			return true
+		}
 		// match: (UGT (FlagConstant [fc]) yes no)
 		// cond: fc.ugt()
 		// result: (First yes no)
@ -25346,6 +25529,34 @@ func rewriteBlockARM64(b *Block) bool {
 			return true
 		}
 	case BlockARM64ULE:
+		// match: (ULE (CMPconst [0] x))
+		// result: (EQ (CMPconst [0] x))
+		for b.Controls[0].Op == OpARM64CMPconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt64(v_0.AuxInt) != 0 {
+				break
+			}
+			x := v_0.Args[0]
+			v0 := b.NewValue0(v_0.Pos, OpARM64CMPconst, types.TypeFlags)
+			v0.AuxInt = int64ToAuxInt(0)
+			v0.AddArg(x)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
+		// match: (ULE (CMPWconst [0] x))
+		// result: (EQ (CMPWconst [0] x))
+		for b.Controls[0].Op == OpARM64CMPWconst {
+			v_0 := b.Controls[0]
+			if auxIntToInt32(v_0.AuxInt) != 0 {
+				break
+			}
+			x := v_0.Args[0]
+			v0 := b.NewValue0(v_0.Pos, OpARM64CMPWconst, types.TypeFlags)
+			v0.AuxInt = int32ToAuxInt(0)
+			v0.AddArg(x)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
 		// match: (ULE (FlagConstant [fc]) yes no)
 		// cond: fc.ule()
 		// result: (First yes no)
@ -25414,6 +25625,37 @@ func rewriteBlockARM64(b *Block) bool {
 			return true
 		}
 	case BlockARM64Z:
+		// match: (Z sub:(SUB x y))
+		// cond: sub.Uses == 1
+		// result: (EQ (CMP x y))
+		for b.Controls[0].Op == OpARM64SUB {
+			sub := b.Controls[0]
+			y := sub.Args[1]
+			x := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMP, types.TypeFlags)
+			v0.AddArg2(x, y)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
+		// match: (Z sub:(SUBconst [c] y))
+		// cond: sub.Uses == 1
+		// result: (EQ (CMPconst [c] y))
+		for b.Controls[0].Op == OpARM64SUBconst {
+			sub := b.Controls[0]
+			c := auxIntToInt64(sub.AuxInt)
+			y := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPconst, types.TypeFlags)
+			v0.AuxInt = int64ToAuxInt(c)
+			v0.AddArg(y)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
 		// match: (Z (ANDconst [c] x) yes no)
 		// cond: oneBit(c)
 		// result: (TBZ [int64(ntz64(c))] x yes no)
@ -25452,6 +25694,37 @@ func rewriteBlockARM64(b *Block) bool {
 			return true
 		}
 	case BlockARM64ZW:
+		// match: (ZW sub:(SUB x y))
+		// cond: sub.Uses == 1
+		// result: (EQ (CMPW x y))
+		for b.Controls[0].Op == OpARM64SUB {
+			sub := b.Controls[0]
+			y := sub.Args[1]
+			x := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPW, types.TypeFlags)
+			v0.AddArg2(x, y)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
+		// match: (ZW sub:(SUBconst [c] y))
+		// cond: sub.Uses == 1
+		// result: (EQ (CMPWconst [int32(c)] y))
+		for b.Controls[0].Op == OpARM64SUBconst {
+			sub := b.Controls[0]
+			c := auxIntToInt64(sub.AuxInt)
+			y := sub.Args[0]
+			if !(sub.Uses == 1) {
+				break
+			}
+			v0 := b.NewValue0(sub.Pos, OpARM64CMPWconst, types.TypeFlags)
+			v0.AuxInt = int32ToAuxInt(int32(c))
+			v0.AddArg(y)
+			b.resetWithControl(BlockARM64EQ, v0)
+			return true
+		}
 		// match: (ZW (ANDconst [c] x) yes no)
 		// cond: oneBit(int64(uint32(c)))
 		// result: (TBZ [int64(ntz64(int64(uint32(c))))] x yes no)
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go
@ -330,6 +330,12 @@ func rewriteValueLOONG64(v *Value) bool {
 		return rewriteValueLOONG64_OpLOONG64DIVV(v)
 	case OpLOONG64DIVVU:
 		return rewriteValueLOONG64_OpLOONG64DIVVU(v)
+	case OpLOONG64LoweredPanicBoundsCR:
+		return rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsCR(v)
+	case OpLOONG64LoweredPanicBoundsRC:
+		return rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsRC(v)
+	case OpLOONG64LoweredPanicBoundsRR:
+		return rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsRR(v)
 	case OpLOONG64MASKEQZ:
 		return rewriteValueLOONG64_OpLOONG64MASKEQZ(v)
 	case OpLOONG64MASKNEZ:
@ -669,7 +675,8 @@ func rewriteValueLOONG64(v *Value) bool {
 		v.Op = OpLOONG64OR
 		return true
 	case OpPanicBounds:
-		return rewriteValueLOONG64_OpPanicBounds(v)
+		v.Op = OpLOONG64LoweredPanicBoundsRR
+		return true
 	case OpPopCount16:
 		return rewriteValueLOONG64_OpPopCount16(v)
 	case OpPopCount32:
@ -2070,6 +2077,86 @@ func rewriteValueLOONG64_OpLOONG64DIVVU(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpLOONG64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpLOONG64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueLOONG64_OpLOONG64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpLOONG64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVVconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpLOONG64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpLOONG64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueLOONG64_OpLOONG64MASKEQZ(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@ -2378,6 +2465,21 @@ func rewriteValueLOONG64_OpLOONG64MOVBUreg(v *Value) bool {
 		v.AddArg(x)
 		return true
 	}
+	// match: (MOVBUreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(uint8(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(uint8(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool {
@ -2526,6 +2628,21 @@ func rewriteValueLOONG64_OpLOONG64MOVBreg(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(int64(int8(c)))
 		return true
 	}
+	// match: (MOVBreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(int8(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(int8(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVBstore(v *Value) bool {
@ -3614,6 +3731,21 @@ func rewriteValueLOONG64_OpLOONG64MOVHUreg(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(int64(uint16(c)))
 		return true
 	}
+	// match: (MOVHUreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(uint16(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(uint16(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool {
@ -3806,6 +3938,21 @@ func rewriteValueLOONG64_OpLOONG64MOVHreg(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(int64(int16(c)))
 		return true
 	}
+	// match: (MOVHreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(int16(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(int16(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVHstore(v *Value) bool {
@ -4821,6 +4968,21 @@ func rewriteValueLOONG64_OpLOONG64MOVWUreg(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(int64(uint32(c)))
 		return true
 	}
+	// match: (MOVWUreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(uint32(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(uint32(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool {
@ -5046,6 +5208,21 @@ func rewriteValueLOONG64_OpLOONG64MOVWreg(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(int64(int32(c)))
 		return true
 	}
+	// match: (MOVWreg x:(ANDconst [c] y))
+	// cond: c >= 0 && int64(int32(c)) == c
+	// result: x
+	for {
+		x := v_0
+		if x.Op != OpLOONG64ANDconst {
+			break
+		}
+		c := auxIntToInt64(x.AuxInt)
+		if !(c >= 0 && int64(int32(c)) == c) {
+			break
+		}
+		v.copyOf(x)
+		return true
+	}
 	return false
 }
 func rewriteValueLOONG64_OpLOONG64MOVWstore(v *Value) bool {
@ -5360,20 +5537,8 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezeroidx(v *Value) bool {
 func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
-	// match: (MULV x (MOVVconst [-1]))
-	// result: (NEGV x)
-	for {
-		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
-			x := v_0
-			if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != -1 {
-				continue
-			}
-			v.reset(OpLOONG64NEGV)
-			v.AddArg(x)
-			return true
-		}
-		break
-	}
+	b := v.Block
+	config := b.Func.Config
 	// match: (MULV _ (MOVVconst [0]))
 	// result: (MOVVconst [0])
 	for {
@ -5401,8 +5566,8 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 		break
 	}
 	// match: (MULV x (MOVVconst [c]))
-	// cond: isPowerOfTwo(c)
-	// result: (SLLVconst [log64(c)] x)
+	// cond: canMulStrengthReduce(config, c)
+	// result: {mulStrengthReduce(v, x, c)}
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			x := v_0
@ -5410,12 +5575,10 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
 				continue
 			}
 			c := auxIntToInt64(v_1.AuxInt)
-			if !(isPowerOfTwo(c)) {
+			if !(canMulStrengthReduce(config, c)) {
 				continue
 			}
-			v.reset(OpLOONG64SLLVconst)
-			v.AuxInt = int64ToAuxInt(log64(c))
-			v.AddArg(x)
+			v.copyOf(mulStrengthReduce(v, x, c))
 			return true
 		}
 		break
@ -9016,60 +9179,6 @@ func rewriteValueLOONG64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueLOONG64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpLOONG64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpLOONG64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpLOONG64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueLOONG64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
--- a/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go
+++ b/src/cmd/compile/internal/ssa/rewriteLOONG64latelower.go
@ -0,0 +1,29 @@
+// Code generated from _gen/LOONG64latelower.rules using 'go generate'; DO NOT EDIT.
+
+package ssa
+
+func rewriteValueLOONG64latelower(v *Value) bool {
+	switch v.Op {
+	case OpLOONG64SLLVconst:
+		return rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v)
+	}
+	return false
+}
+func rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v *Value) bool {
+	v_0 := v.Args[0]
+	// match: (SLLVconst [1] x)
+	// result: (ADDV x x)
+	for {
+		if auxIntToInt64(v.AuxInt) != 1 {
+			break
+		}
+		x := v_0
+		v.reset(OpLOONG64ADDV)
+		v.AddArg2(x, x)
+		return true
+	}
+	return false
+}
+func rewriteBlockLOONG64latelower(b *Block) bool {
+	return false
+}
--- a/src/cmd/compile/internal/ssa/rewriteMIPS.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS.go
@ -279,6 +279,14 @@ func rewriteValueMIPS(v *Value) bool {
 		return rewriteValueMIPS_OpMIPSLoweredAtomicAdd(v)
 	case OpMIPSLoweredAtomicStore32:
 		return rewriteValueMIPS_OpMIPSLoweredAtomicStore32(v)
+	case OpMIPSLoweredPanicBoundsRC:
+		return rewriteValueMIPS_OpMIPSLoweredPanicBoundsRC(v)
+	case OpMIPSLoweredPanicBoundsRR:
+		return rewriteValueMIPS_OpMIPSLoweredPanicBoundsRR(v)
+	case OpMIPSLoweredPanicExtendRC:
+		return rewriteValueMIPS_OpMIPSLoweredPanicExtendRC(v)
+	case OpMIPSLoweredPanicExtendRR:
+		return rewriteValueMIPS_OpMIPSLoweredPanicExtendRR(v)
 	case OpMIPSMOVBUload:
 		return rewriteValueMIPS_OpMIPSMOVBUload(v)
 	case OpMIPSMOVBUreg:
@ -447,9 +455,11 @@ func rewriteValueMIPS(v *Value) bool {
 		v.Op = OpMIPSOR
 		return true
 	case OpPanicBounds:
-		return rewriteValueMIPS_OpPanicBounds(v)
+		v.Op = OpMIPSLoweredPanicBoundsRR
+		return true
 	case OpPanicExtend:
-		return rewriteValueMIPS_OpPanicExtend(v)
+		v.Op = OpMIPSLoweredPanicExtendRR
+		return true
 	case OpPubBarrier:
 		v.Op = OpMIPSLoweredPubBarrier
 		return true
@ -2435,6 +2445,135 @@ func rewriteValueMIPS_OpMIPSLoweredAtomicStore32(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueMIPS_OpMIPSLoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVWconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(c), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpMIPSMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpMIPSLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(c), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS_OpMIPSLoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVWconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpMIPSMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpMIPSLoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVWconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(c)}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpMIPSMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpMIPSLoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS_OpMIPSLoweredPanicExtendRC(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRC [kind] {p} (MOVWconst [hi]) (MOVWconst [lo]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:int64(hi)<<32+int64(uint32(lo)), Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpMIPSMOVWconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != OpMIPSMOVWconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpMIPSLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: int64(hi)<<32 + int64(uint32(lo)), Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS_OpMIPSLoweredPanicExtendRR(v *Value) bool {
+	v_3 := v.Args[3]
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicExtendRR [kind] hi lo (MOVWconst [c]) mem)
+	// result: (LoweredPanicExtendRC [kind] hi lo {PanicBoundsC{C:int64(c)}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		hi := v_0
+		lo := v_1
+		if v_2.Op != OpMIPSMOVWconst {
+			break
+		}
+		c := auxIntToInt32(v_2.AuxInt)
+		mem := v_3
+		v.reset(OpMIPSLoweredPanicExtendRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(c)})
+		v.AddArg3(hi, lo, mem)
+		return true
+	}
+	// match: (LoweredPanicExtendRR [kind] (MOVWconst [hi]) (MOVWconst [lo]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:int64(hi)<<32 + int64(uint32(lo))}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpMIPSMOVWconst {
+			break
+		}
+		hi := auxIntToInt32(v_0.AuxInt)
+		if v_1.Op != OpMIPSMOVWconst {
+			break
+		}
+		lo := auxIntToInt32(v_1.AuxInt)
+		y := v_2
+		mem := v_3
+		v.reset(OpMIPSLoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: int64(hi)<<32 + int64(uint32(lo))})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueMIPS_OpMIPSMOVBUload(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@ -4058,8 +4197,8 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
 		break
 	}
 	// match: (MUL (MOVWconst [c]) x )
-	// cond: isPowerOfTwo(int64(uint32(c)))
-	// result: (SLLconst [int32(log2uint32(int64(c)))] x)
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SLLconst [int32(log32u(uint32(c)))] x)
 	for {
 		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
 			if v_0.Op != OpMIPSMOVWconst {
@ -4067,11 +4206,11 @@ func rewriteValueMIPS_OpMIPSMUL(v *Value) bool {
 			}
 			c := auxIntToInt32(v_0.AuxInt)
 			x := v_1
-			if !(isPowerOfTwo(int64(uint32(c)))) {
+			if !(isUnsignedPowerOfTwo(uint32(c))) {
 				continue
 			}
 			v.reset(OpMIPSSLLconst)
-			v.AuxInt = int32ToAuxInt(int32(log2uint32(int64(c))))
+			v.AuxInt = int32ToAuxInt(int32(log32u(uint32(c))))
 			v.AddArg(x)
 			return true
 		}
@ -5586,118 +5725,6 @@ func rewriteValueMIPS_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueMIPS_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
-func rewriteValueMIPS_OpPanicExtend(v *Value) bool {
-	v_3 := v.Args[3]
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicExtendA [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicExtendA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicExtendB [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicExtendB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	// match: (PanicExtend [kind] hi lo y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicExtendC [kind] hi lo y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		hi := v_0
-		lo := v_1
-		y := v_2
-		mem := v_3
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpMIPSLoweredPanicExtendC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg4(hi, lo, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueMIPS_OpRotateLeft16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@ -6611,8 +6638,8 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
 		break
 	}
 	// match: (Select0 (MULTU (MOVWconst [c]) x ))
-	// cond: isPowerOfTwo(int64(uint32(c)))
-	// result: (SRLconst [int32(32-log2uint32(int64(c)))] x)
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SRLconst [int32(32-log32u(uint32(c)))] x)
 	for {
 		if v_0.Op != OpMIPSMULTU {
 			break
@ -6626,11 +6653,11 @@ func rewriteValueMIPS_OpSelect0(v *Value) bool {
 			}
 			c := auxIntToInt32(v_0_0.AuxInt)
 			x := v_0_1
-			if !(isPowerOfTwo(int64(uint32(c)))) {
+			if !(isUnsignedPowerOfTwo(uint32(c))) {
 				continue
 			}
 			v.reset(OpMIPSSRLconst)
-			v.AuxInt = int32ToAuxInt(int32(32 - log2uint32(int64(c))))
+			v.AuxInt = int32ToAuxInt(int32(32 - log32u(uint32(c))))
 			v.AddArg(x)
 			return true
 		}
@ -6807,8 +6834,8 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
 		break
 	}
 	// match: (Select1 (MULTU (MOVWconst [c]) x ))
-	// cond: isPowerOfTwo(int64(uint32(c)))
-	// result: (SLLconst [int32(log2uint32(int64(c)))] x)
+	// cond: isUnsignedPowerOfTwo(uint32(c))
+	// result: (SLLconst [int32(log32u(uint32(c)))] x)
 	for {
 		if v_0.Op != OpMIPSMULTU {
 			break
@ -6822,11 +6849,11 @@ func rewriteValueMIPS_OpSelect1(v *Value) bool {
 			}
 			c := auxIntToInt32(v_0_0.AuxInt)
 			x := v_0_1
-			if !(isPowerOfTwo(int64(uint32(c)))) {
+			if !(isUnsignedPowerOfTwo(uint32(c))) {
 				continue
 			}
 			v.reset(OpMIPSSLLconst)
-			v.AuxInt = int32ToAuxInt(int32(log2uint32(int64(c))))
+			v.AuxInt = int32ToAuxInt(int32(log32u(uint32(c))))
 			v.AddArg(x)
 			return true
 		}
--- a/src/cmd/compile/internal/ssa/rewriteMIPS64.go
+++ b/src/cmd/compile/internal/ssa/rewriteMIPS64.go
@ -316,6 +316,12 @@ func rewriteValueMIPS64(v *Value) bool {
 		return rewriteValueMIPS64_OpMIPS64LoweredAtomicStore32(v)
 	case OpMIPS64LoweredAtomicStore64:
 		return rewriteValueMIPS64_OpMIPS64LoweredAtomicStore64(v)
+	case OpMIPS64LoweredPanicBoundsCR:
+		return rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsCR(v)
+	case OpMIPS64LoweredPanicBoundsRC:
+		return rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsRC(v)
+	case OpMIPS64LoweredPanicBoundsRR:
+		return rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsRR(v)
 	case OpMIPS64MOVBUload:
 		return rewriteValueMIPS64_OpMIPS64MOVBUload(v)
 	case OpMIPS64MOVBUreg:
@ -501,7 +507,8 @@ func rewriteValueMIPS64(v *Value) bool {
 		v.Op = OpMIPS64OR
 		return true
 	case OpPanicBounds:
-		return rewriteValueMIPS64_OpPanicBounds(v)
+		v.Op = OpMIPS64LoweredPanicBoundsRR
+		return true
 	case OpPubBarrier:
 		v.Op = OpMIPS64LoweredPubBarrier
 		return true
@ -2757,6 +2764,86 @@ func rewriteValueMIPS64_OpMIPS64LoweredAtomicStore64(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpMIPS64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpMIPS64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueMIPS64_OpMIPS64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVVconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpMIPS64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVVconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpMIPS64MOVVconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpMIPS64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueMIPS64_OpMIPS64MOVBUload(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
@ -6364,60 +6451,6 @@ func rewriteValueMIPS64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueMIPS64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpMIPS64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpMIPS64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpMIPS64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueMIPS64_OpRotateLeft16(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
--- a/src/cmd/compile/internal/ssa/rewriteRISCV64.go
+++ b/src/cmd/compile/internal/ssa/rewriteRISCV64.go
@ -486,7 +486,8 @@ func rewriteValueRISCV64(v *Value) bool {
 		v.Op = OpRISCV64OR
 		return true
 	case OpPanicBounds:
-		return rewriteValueRISCV64_OpPanicBounds(v)
+		v.Op = OpRISCV64LoweredPanicBoundsRR
+		return true
 	case OpPopCount16:
 		return rewriteValueRISCV64_OpPopCount16(v)
 	case OpPopCount32:
@ -532,6 +533,12 @@ func rewriteValueRISCV64(v *Value) bool {
 		return rewriteValueRISCV64_OpRISCV64FSUBD(v)
 	case OpRISCV64FSUBS:
 		return rewriteValueRISCV64_OpRISCV64FSUBS(v)
+	case OpRISCV64LoweredPanicBoundsCR:
+		return rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsCR(v)
+	case OpRISCV64LoweredPanicBoundsRC:
+		return rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsRC(v)
+	case OpRISCV64LoweredPanicBoundsRR:
+		return rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsRR(v)
 	case OpRISCV64MOVBUload:
 		return rewriteValueRISCV64_OpRISCV64MOVBUload(v)
 	case OpRISCV64MOVBUreg:
@ -3416,60 +3423,6 @@ func rewriteValueRISCV64_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueRISCV64_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpRISCV64LoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpRISCV64LoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpRISCV64LoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueRISCV64_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@ -4239,6 +4192,86 @@ func rewriteValueRISCV64_OpRISCV64FSUBS(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpRISCV64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpRISCV64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpRISCV64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpRISCV64LoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueRISCV64_OpRISCV64LoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpRISCV64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpRISCV64LoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpRISCV64MOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpRISCV64LoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueRISCV64_OpRISCV64MOVBUload(v *Value) bool {
 	v_1 := v.Args[1]
 	v_0 := v.Args[0]
--- a/src/cmd/compile/internal/ssa/rewriteS390X.go
+++ b/src/cmd/compile/internal/ssa/rewriteS390X.go
@ -368,6 +368,18 @@ func rewriteValueS390X(v *Value) bool {
 		return rewriteValueS390X_OpLsh8x64(v)
 	case OpLsh8x8:
 		return rewriteValueS390X_OpLsh8x8(v)
+	case OpMax32F:
+		v.Op = OpS390XWFMAXSB
+		return true
+	case OpMax64F:
+		v.Op = OpS390XWFMAXDB
+		return true
+	case OpMin32F:
+		v.Op = OpS390XWFMINSB
+		return true
+	case OpMin64F:
+		v.Op = OpS390XWFMINDB
+		return true
 	case OpMod16:
 		return rewriteValueS390X_OpMod16(v)
 	case OpMod16u:
@ -465,7 +477,8 @@ func rewriteValueS390X(v *Value) bool {
 		v.Op = OpS390XORW
 		return true
 	case OpPanicBounds:
-		return rewriteValueS390X_OpPanicBounds(v)
+		v.Op = OpS390XLoweredPanicBoundsRR
+		return true
 	case OpPopCount16:
 		return rewriteValueS390X_OpPopCount16(v)
 	case OpPopCount32:
@ -632,6 +645,12 @@ func rewriteValueS390X(v *Value) bool {
 		return rewriteValueS390X_OpS390XLTDBR(v)
 	case OpS390XLTEBR:
 		return rewriteValueS390X_OpS390XLTEBR(v)
+	case OpS390XLoweredPanicBoundsCR:
+		return rewriteValueS390X_OpS390XLoweredPanicBoundsCR(v)
+	case OpS390XLoweredPanicBoundsRC:
+		return rewriteValueS390X_OpS390XLoweredPanicBoundsRC(v)
+	case OpS390XLoweredPanicBoundsRR:
+		return rewriteValueS390X_OpS390XLoweredPanicBoundsRR(v)
 	case OpS390XLoweredRound32F:
 		return rewriteValueS390X_OpS390XLoweredRound32F(v)
 	case OpS390XLoweredRound64F:
@ -3959,60 +3978,6 @@ func rewriteValueS390X_OpOffPtr(v *Value) bool {
 		return true
 	}
 }
-func rewriteValueS390X_OpPanicBounds(v *Value) bool {
-	v_2 := v.Args[2]
-	v_1 := v.Args[1]
-	v_0 := v.Args[0]
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 0
-	// result: (LoweredPanicBoundsA [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 0) {
-			break
-		}
-		v.reset(OpS390XLoweredPanicBoundsA)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 1
-	// result: (LoweredPanicBoundsB [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 1) {
-			break
-		}
-		v.reset(OpS390XLoweredPanicBoundsB)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	// match: (PanicBounds [kind] x y mem)
-	// cond: boundsABI(kind) == 2
-	// result: (LoweredPanicBoundsC [kind] x y mem)
-	for {
-		kind := auxIntToInt64(v.AuxInt)
-		x := v_0
-		y := v_1
-		mem := v_2
-		if !(boundsABI(kind) == 2) {
-			break
-		}
-		v.reset(OpS390XLoweredPanicBoundsC)
-		v.AuxInt = int64ToAuxInt(kind)
-		v.AddArg3(x, y, mem)
-		return true
-	}
-	return false
-}
 func rewriteValueS390X_OpPopCount16(v *Value) bool {
 	v_0 := v.Args[0]
 	b := v.Block
@ -8135,6 +8100,86 @@ func rewriteValueS390X_OpS390XLTEBR(v *Value) bool {
 	}
 	return false
 }
+func rewriteValueS390X_OpS390XLoweredPanicBoundsCR(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpS390XMOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpS390XLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueS390X_OpS390XLoweredPanicBoundsRC(v *Value) bool {
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		p := auxToPanicBoundsC(v.Aux)
+		if v_0.Op != OpS390XMOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		mem := v_1
+		v.reset(OpS390XLoweredPanicBoundsCC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
+		v.AddArg(mem)
+		return true
+	}
+	return false
+}
+func rewriteValueS390X_OpS390XLoweredPanicBoundsRR(v *Value) bool {
+	v_2 := v.Args[2]
+	v_1 := v.Args[1]
+	v_0 := v.Args[0]
+	// match: (LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem)
+	// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		x := v_0
+		if v_1.Op != OpS390XMOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_1.AuxInt)
+		mem := v_2
+		v.reset(OpS390XLoweredPanicBoundsRC)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(x, mem)
+		return true
+	}
+	// match: (LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem)
+	// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
+	for {
+		kind := auxIntToInt64(v.AuxInt)
+		if v_0.Op != OpS390XMOVDconst {
+			break
+		}
+		c := auxIntToInt64(v_0.AuxInt)
+		y := v_1
+		mem := v_2
+		v.reset(OpS390XLoweredPanicBoundsCR)
+		v.AuxInt = int64ToAuxInt(kind)
+		v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
+		v.AddArg2(y, mem)
+		return true
+	}
+	return false
+}
 func rewriteValueS390X_OpS390XLoweredRound32F(v *Value) bool {
 	v_0 := v.Args[0]
 	// match: (LoweredRound32F x:(FMOVSconst))
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`API changes and other small changes to the standard library go here.`