[dev.boringcrypto] all: merge master into dev.boringcrypto

Change-Id: I52009bf809dda4fbcff03aa82d0ea8aa2a978fa2
2025-12-08 06:10:04 +00:00 · 2022-04-20 16:56:54 +02:00 · 2022-04-20 16:56:54 +02:00 · e07d63964b
commit e07d63964b
parent 1f11660f54 d68a8d0f27
801 changed files with 20643 additions and 7106 deletions
--- a/1
+++ b/1
@ -1435,6 +1435,7 @@ Wei Guangjing <vcc.163@gmail.com>
 Weichao Tang <tevic.tt@gmail.com>
 Weixie Cui <cuiweixie@gmail.com> <523516579@qq.com>
 Wembley G. Leach, Jr <wembley.gl@gmail.com>
+Wen Yang <yangwen.yw@gmail.com>
 Will Faught <will.faught@gmail.com>
 Will Storey <will@summercat.com>
 Willem van der Schyff <willemvds@gmail.com>
--- a/1
+++ b/1
@ -2746,6 +2746,7 @@ Weichao Tang <tevic.tt@gmail.com>
 Weilu Jia <optix2000@gmail.com>
 Weixie Cui <cuiweixie@gmail.com> <523516579@qq.com>
 Wembley G. Leach, Jr <wembley.gl@gmail.com>
+Wen Yang <yangwen.yw@gmail.com>
 Wenlei (Frank) He <wlhe@google.com>
 Wenzel Lowe <lowewenzel@gmail.com>
 Wil Selwood <wselwood@gmail.com>
--- a/api/next/35044.txt
+++ b/api/next/35044.txt
@ -0,0 +1 @@
+pkg crypto/x509, method (*CertPool) Clone() *CertPool #35044
--- a/api/next/42710.txt
+++ b/api/next/42710.txt
@ -0,0 +1,2 @@
+pkg hash/maphash, func Bytes(Seed, []uint8) uint64 #42710
+pkg hash/maphash, func String(Seed, string) uint64 #42710
--- a/api/next/50340.txt
+++ b/api/next/50340.txt
@ -0,0 +1 @@
+pkg sort, func Find(int, func(int) int) (int, bool) #50340
--- a/api/next/50674.txt
+++ b/api/next/50674.txt
@ -0,0 +1,9 @@
+pkg crypto/x509, func ParseRevocationList([]uint8) (*RevocationList, error) #50674
+pkg crypto/x509, method (*RevocationList) CheckSignatureFrom(*Certificate) error #50674
+pkg crypto/x509, type RevocationList struct, AuthorityKeyId []uint8 #50674
+pkg crypto/x509, type RevocationList struct, Extensions []pkix.Extension #50674
+pkg crypto/x509, type RevocationList struct, Issuer pkix.Name #50674
+pkg crypto/x509, type RevocationList struct, Raw []uint8 #50674
+pkg crypto/x509, type RevocationList struct, RawIssuer []uint8 #50674
+pkg crypto/x509, type RevocationList struct, RawTBSRevocationList []uint8 #50674
+pkg crypto/x509, type RevocationList struct, Signature []uint8 #50674
--- a/api/next/51082.txt
+++ b/api/next/51082.txt
@ -0,0 +1,61 @@
+pkg go/doc, method (*Package) HTML(string) []uint8 #51082
+pkg go/doc, method (*Package) Markdown(string) []uint8 #51082
+pkg go/doc, method (*Package) Parser() *comment.Parser #51082
+pkg go/doc, method (*Package) Printer() *comment.Printer #51082
+pkg go/doc, method (*Package) Synopsis(string) string #51082
+pkg go/doc, method (*Package) Text(string) []uint8 #51082
+pkg go/doc/comment, func DefaultLookupPackage(string) (string, bool) #51082
+pkg go/doc/comment, method (*DocLink) DefaultURL(string) string #51082
+pkg go/doc/comment, method (*Heading) DefaultID() string #51082
+pkg go/doc/comment, method (*List) BlankBefore() bool #51082
+pkg go/doc/comment, method (*List) BlankBetween() bool #51082
+pkg go/doc/comment, method (*Parser) Parse(string) *Doc #51082
+pkg go/doc/comment, method (*Printer) Comment(*Doc) []uint8 #51082
+pkg go/doc/comment, method (*Printer) HTML(*Doc) []uint8 #51082
+pkg go/doc/comment, method (*Printer) Markdown(*Doc) []uint8 #51082
+pkg go/doc/comment, method (*Printer) Text(*Doc) []uint8 #51082
+pkg go/doc/comment, type Block interface, unexported methods #51082
+pkg go/doc/comment, type Code struct #51082
+pkg go/doc/comment, type Code struct, Text string #51082
+pkg go/doc/comment, type Doc struct #51082
+pkg go/doc/comment, type Doc struct, Content []Block #51082
+pkg go/doc/comment, type Doc struct, Links []*LinkDef #51082
+pkg go/doc/comment, type DocLink struct #51082
+pkg go/doc/comment, type DocLink struct, ImportPath string #51082
+pkg go/doc/comment, type DocLink struct, Name string #51082
+pkg go/doc/comment, type DocLink struct, Recv string #51082
+pkg go/doc/comment, type DocLink struct, Text []Text #51082
+pkg go/doc/comment, type Heading struct #51082
+pkg go/doc/comment, type Heading struct, Text []Text #51082
+pkg go/doc/comment, type Italic string #51082
+pkg go/doc/comment, type Link struct #51082
+pkg go/doc/comment, type Link struct, Auto bool #51082
+pkg go/doc/comment, type Link struct, Text []Text #51082
+pkg go/doc/comment, type Link struct, URL string #51082
+pkg go/doc/comment, type LinkDef struct #51082
+pkg go/doc/comment, type LinkDef struct, Text string #51082
+pkg go/doc/comment, type LinkDef struct, URL string #51082
+pkg go/doc/comment, type LinkDef struct, Used bool #51082
+pkg go/doc/comment, type List struct #51082
+pkg go/doc/comment, type List struct, ForceBlankBefore bool #51082
+pkg go/doc/comment, type List struct, ForceBlankBetween bool #51082
+pkg go/doc/comment, type List struct, Items []*ListItem #51082
+pkg go/doc/comment, type ListItem struct #51082
+pkg go/doc/comment, type ListItem struct, Content []Block #51082
+pkg go/doc/comment, type ListItem struct, Number string #51082
+pkg go/doc/comment, type Paragraph struct #51082
+pkg go/doc/comment, type Paragraph struct, Text []Text #51082
+pkg go/doc/comment, type Parser struct #51082
+pkg go/doc/comment, type Parser struct, LookupPackage func(string) (string, bool) #51082
+pkg go/doc/comment, type Parser struct, LookupSym func(string, string) bool #51082
+pkg go/doc/comment, type Parser struct, Words map[string]string #51082
+pkg go/doc/comment, type Plain string #51082
+pkg go/doc/comment, type Printer struct #51082
+pkg go/doc/comment, type Printer struct, DocLinkBaseURL string #51082
+pkg go/doc/comment, type Printer struct, DocLinkURL func(*DocLink) string #51082
+pkg go/doc/comment, type Printer struct, HeadingID func(*Heading) string #51082
+pkg go/doc/comment, type Printer struct, HeadingLevel int #51082
+pkg go/doc/comment, type Printer struct, TextCodePrefix string #51082
+pkg go/doc/comment, type Printer struct, TextPrefix string #51082
+pkg go/doc/comment, type Printer struct, TextWidth int #51082
+pkg go/doc/comment, type Text interface, unexported methods #51082
--- a/api/next/51644.txt
+++ b/api/next/51644.txt
@ -0,0 +1,2 @@
+pkg encoding/binary, func AppendUvarint([]uint8, uint64) []uint8 #51644
+pkg encoding/binary, func AppendVarint([]uint8, int64) []uint8 #51644
--- a/doc/go1.19.html
+++ b/doc/go1.19.html
@ -32,7 +32,22 @@ Do not send CLs removing the interior tags from such phrases.
 </p>
 <h3 id="go-command">Go command</h3>
 <p>
-  TODO: complete this section, or delete if not needed
+  TODO: complete this section.
+</p>
+
+<!-- https://go.dev/issue/51461 -->
+<p>
+  The <code>-trimpath</code> flag, if set, is now included in the build settings
+  stamped into Go binaries by <code>go</code> <code>build</code>, and can be
+  examined using
+  <a href="https://pkg.go.dev/cmd/go#hdr-Print_Go_version"><code>go</code> <code>version</code> <code>-m</code></a>
+  or <a href="https://pkg.go.dev/runtime/debug#ReadBuildInfo"><code>debug.ReadBuildInfo</code></a>.
+</p>
+<p>
+  <code>go</code> <code>generate</code> now sets the <code>GOROOT</code>
+  environment variable explicitly in the generator's environment, so that
+  generators can locate the correct <code>GOROOT</code> even if built
+  with <code>-trimpath</code>.
 </p>

 <h4 id="go-unix">New <code>unix</code> build constraint</h4>
@ -76,6 +91,19 @@ Do not send CLs removing the interior tags from such phrases.
 <p>
  TODO: complete this section
 </p>
+
+<dl id="image/draw"><dt><a href="/pkg/image/draw/">image/draw</a></dt>
+  <dd>
+    <p><!-- CL 396795 -->
+      <code>Draw</code> with the <code>Src</code> operator preserves
+      non-premultiplied-alpha colors when destination and source images are
+      both <code>*image.NRGBA</code> (or both <code>*image.NRGBA64</code>).
+      This reverts a behavior change accidentally introduced by a Go 1.18
+      library optimization, to match the behavior in Go 1.17 and earlier.
+    </p>
+  </dd>
+</dl><!-- image/draw -->
+
 <dl id="net"><dt><a href="/pkg/net/">net</a></dt>
  <dd>
    <p><!-- CL 386016 -->
@ -93,9 +121,9 @@ Do not send CLs removing the interior tags from such phrases.
    <p><!-- CL 396877 -->
      When a net package function or method returns an "I/O timeout"
      error, the error will now satisfy <code>errors.Is(err,
-      context.Canceled)</code>.  When a net package function returns
-      an "operation was canceled" error, the error will now satisfy
-      <code>errors.Is(err, context.DeadlineExceeded)</code>.
+      context.DeadlineExceeded)</code>.  When a net package function
+      returns an "operation was canceled" error, the error will now
+      satisfy <code>errors.Is(err, context.Canceled)</code>.
      These changes are intended to make it easier for code to test
      for cases in which a context cancelation or timeout causes a net
      package function or method to return an error, while preserving
@ -104,6 +132,17 @@ Do not send CLs removing the interior tags from such phrases.
  </dd>
 </dl><!-- net -->

+<dl id="runtime"><dt><a href="/pkg/runtime/">runtime</a></dt>
+  <dd>
+    <p><!-- https://go.dev/issue/51461 -->
+      The <code>GOROOT</code> function now returns the empty string
+      (instead of <code>"go"</code>) when the binary was built with
+      the <code>-trimpath</code> flag set and the <code>GOROOT</code>
+      variable is not set in the process environment.
+    </p>
+  </dd>
+</dl><!-- runtime -->
+
 <dl id="strconv"><dt><a href="/pkg/strconv/">strconv</a></dt>
  <dd>
    <p><!-- CL 397255 -->
--- a/misc/cgo/gmp/gmp.go
+++ b/misc/cgo/gmp/gmp.go
@ -333,10 +333,9 @@ func (z *Int) Abs(x *Int) *Int {

 // CmpInt compares x and y. The result is
 //
-//   -1 if x <  y
-//    0 if x == y
-//   +1 if x >  y
-//
+//	-1 if x <  y
+//	 0 if x == y
+//	+1 if x >  y
 func CmpInt(x, y *Int) int {
 	x.doinit()
 	y.doinit()
--- a/misc/cgo/testcshared/cshared_test.go
+++ b/misc/cgo/testcshared/cshared_test.go
@ -5,6 +5,7 @@
 package cshared_test

 import (
+	"bufio"
 	"bytes"
 	"debug/elf"
 	"debug/pe"
@ -838,3 +839,51 @@ func TestGo2C2Go(t *testing.T) {
 	run(t, goenv, "go", "build", "-o", bin, "./go2c2go/m2")
 	runExe(t, runenv, bin)
 }
+
+func TestIssue36233(t *testing.T) {
+	t.Parallel()
+
+	// Test that the export header uses GoComplex64 and GoComplex128
+	// for complex types.
+
+	tmpdir, err := os.MkdirTemp("", "cshared-TestIssue36233")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer os.RemoveAll(tmpdir)
+
+	const exportHeader = "issue36233.h"
+
+	run(t, nil, "go", "tool", "cgo", "-exportheader", exportHeader, "-objdir", tmpdir, "./issue36233/issue36233.go")
+	data, err := os.ReadFile(exportHeader)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	funcs := []struct{ name, signature string }{
+		{"exportComplex64", "GoComplex64 exportComplex64(GoComplex64 v)"},
+		{"exportComplex128", "GoComplex128 exportComplex128(GoComplex128 v)"},
+		{"exportComplexfloat", "GoComplex64 exportComplexfloat(GoComplex64 v)"},
+		{"exportComplexdouble", "GoComplex128 exportComplexdouble(GoComplex128 v)"},
+	}
+
+	scanner := bufio.NewScanner(bytes.NewReader(data))
+	var found int
+	for scanner.Scan() {
+		b := scanner.Bytes()
+		for _, fn := range funcs {
+			if bytes.Contains(b, []byte(fn.name)) {
+				found++
+				if !bytes.Contains(b, []byte(fn.signature)) {
+					t.Errorf("function signature mismatch; got %q, want %q", b, fn.signature)
+				}
+			}
+		}
+	}
+	if err = scanner.Err(); err != nil {
+		t.Errorf("scanner encountered error: %v", err)
+	}
+	if found != len(funcs) {
+		t.Error("missing functions")
+	}
+}
--- a/misc/cgo/testcshared/testdata/issue36233/issue36233.go
+++ b/misc/cgo/testcshared/testdata/issue36233/issue36233.go
@ -0,0 +1,29 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+package main
+
+// #include <complex.h>
+import "C"
+
+//export exportComplex64
+func exportComplex64(v complex64) complex64 {
+	return v
+}
+
+//export exportComplex128
+func exportComplex128(v complex128) complex128 {
+	return v
+}
+
+//export exportComplexfloat
+func exportComplexfloat(v C.complexfloat) C.complexfloat {
+	return v
+}
+
+//export exportComplexdouble
+func exportComplexdouble(v C.complexdouble) C.complexdouble {
+	return v
+}
+
+func main() {}
--- a/misc/ios/go_ios_exec.go
+++ b/misc/ios/go_ios_exec.go
@ -13,9 +13,11 @@
 // binary.
 //
 // This script requires that three environment variables be set:
-// 	GOIOS_DEV_ID: The codesigning developer id or certificate identifier
-// 	GOIOS_APP_ID: The provisioning app id prefix. Must support wildcard app ids.
-// 	GOIOS_TEAM_ID: The team id that owns the app id prefix.
+//
+//	GOIOS_DEV_ID: The codesigning developer id or certificate identifier
+//	GOIOS_APP_ID: The provisioning app id prefix. Must support wildcard app ids.
+//	GOIOS_TEAM_ID: The team id that owns the app id prefix.
+//
 // $GOROOT/misc/ios contains a script, detect.go, that attempts to autodetect these.
 package main

--- a/src/archive/tar/common.go
+++ b/src/archive/tar/common.go
@ -221,9 +221,11 @@ func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
 // that the file has no data in it, which is rather odd.
 //
 // As an example, if the underlying raw file contains the 10-byte data:
+//
 //	var compactFile = "abcdefgh"
 //
 // And the sparse map has the following entries:
+//
 //	var spd sparseDatas = []sparseEntry{
 //		{Offset: 2,  Length: 5},  // Data fragment for 2..6
 //		{Offset: 18, Length: 3},  // Data fragment for 18..20
@ -235,6 +237,7 @@ func (s sparseEntry) endOffset() int64 { return s.Offset + s.Length }
 //	}
 //
 // Then the content of the resulting sparse file with a Header.Size of 25 is:
+//
 //	var sparseFile = "\x00"*2 + "abcde" + "\x00"*11 + "fgh" + "\x00"*4
 type (
 	sparseDatas []sparseEntry
@ -293,9 +296,9 @@ func alignSparseEntries(src []sparseEntry, size int64) []sparseEntry {
 // The input must have been already validated.
 //
 // This function mutates src and returns a normalized map where:
-//	* adjacent fragments are coalesced together
-//	* only the last fragment may be empty
-//	* the endOffset of the last fragment is the total size
+//   - adjacent fragments are coalesced together
+//   - only the last fragment may be empty
+//   - the endOffset of the last fragment is the total size
 func invertSparseEntries(src []sparseEntry, size int64) []sparseEntry {
 	dst := src[:0]
 	var pre sparseEntry
--- a/src/archive/tar/reader.go
+++ b/src/archive/tar/reader.go
@ -336,9 +336,9 @@ func parsePAX(r io.Reader) (map[string]string, error) {
 // header in case further processing is required.
 //
 // The err will be set to io.EOF only when one of the following occurs:
-//	* Exactly 0 bytes are read and EOF is hit.
-//	* Exactly 1 block of zeros is read and EOF is hit.
-//	* At least 2 blocks of zeros are read.
+//   - Exactly 0 bytes are read and EOF is hit.
+//   - Exactly 1 block of zeros is read and EOF is hit.
+//   - At least 2 blocks of zeros are read.
 func (tr *Reader) readHeader() (*Header, *block, error) {
 	// Two blocks of zero bytes marks the end of the archive.
 	if _, err := io.ReadFull(tr.r, tr.blk[:]); err != nil {
--- a/src/archive/tar/strconv.go
+++ b/src/archive/tar/strconv.go
@ -306,6 +306,7 @@ func formatPAXRecord(k, v string) (string, error) {

 // validPAXRecord reports whether the key-value pair is valid where each
 // record is formatted as:
+//
 //	"%d %s=%s\n" % (size, key, value)
 //
 // Keys and values should be UTF-8, but the number of bad writers out there
--- a/src/archive/zip/reader.go
+++ b/src/archive/zip/reader.go
@ -229,6 +229,9 @@ func (r *checksumReader) Read(b []byte) (n int, err error) {
 	n, err = r.rc.Read(b)
 	r.hash.Write(b[:n])
 	r.nread += uint64(n)
+	if r.nread > r.f.UncompressedSize64 {
+		return 0, ErrFormat
+	}
 	if err == nil {
 		return
 	}
--- a/src/archive/zip/reader_test.go
+++ b/src/archive/zip/reader_test.go
@ -1407,3 +1407,30 @@ func TestCVE202141772(t *testing.T) {
 		t.Errorf("Inconsistent name in info entry: %v", name)
 	}
 }
+
+func TestUnderSize(t *testing.T) {
+	z, err := OpenReader("testdata/readme.zip")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer z.Close()
+
+	for _, f := range z.File {
+		f.UncompressedSize64 = 1
+	}
+
+	for _, f := range z.File {
+		t.Run(f.Name, func(t *testing.T) {
+			rd, err := f.Open()
+			if err != nil {
+				t.Fatal(err)
+			}
+			defer rd.Close()
+
+			_, err = io.Copy(io.Discard, rd)
+			if err != ErrFormat {
+				t.Fatalf("Error mismatch\n\tGot:  %v\n\tWant: %v", err, ErrFormat)
+			}
+		})
+	}
+}
--- a/src/builtin/builtin.go
+++ b/src/builtin/builtin.go
@ -137,9 +137,12 @@ type ComplexType complex64
 // new elements. If it does not, a new underlying array will be allocated.
 // Append returns the updated slice. It is therefore necessary to store the
 // result of append, often in the variable holding the slice itself:
+//
 //	slice = append(slice, elem1, elem2)
 //	slice = append(slice, anotherSlice...)
+//
 // As a special case, it is legal to append a string to a byte slice, like this:
+//
 //	slice = append([]byte("hello "), "world"...)
 func append(slice []Type, elems ...Type) []Type

@ -156,24 +159,28 @@ func copy(dst, src []Type) int
 func delete(m map[Type]Type1, key Type)

 // The len built-in function returns the length of v, according to its type:
+//
 //	Array: the number of elements in v.
 //	Pointer to array: the number of elements in *v (even if v is nil).
 //	Slice, or map: the number of elements in v; if v is nil, len(v) is zero.
 //	String: the number of bytes in v.
 //	Channel: the number of elements queued (unread) in the channel buffer;
 //	         if v is nil, len(v) is zero.
+//
 // For some arguments, such as a string literal or a simple array expression, the
 // result can be a constant. See the Go language specification's "Length and
 // capacity" section for details.
 func len(v Type) int

 // The cap built-in function returns the capacity of v, according to its type:
+//
 //	Array: the number of elements in v (same as len(v)).
 //	Pointer to array: the number of elements in *v (same as len(v)).
 //	Slice: the maximum length the slice can reach when resliced;
 //	if v is nil, cap(v) is zero.
 //	Channel: the channel buffer capacity, in units of elements;
 //	if v is nil, cap(v) is zero.
+//
 // For some arguments, such as a simple array expression, the result can be a
 // constant. See the Go language specification's "Length and capacity" section for
 // details.
@ -184,6 +191,7 @@ func cap(v Type) int
 // value. Unlike new, make's return type is the same as the type of its
 // argument, not a pointer to it. The specification of the result depends on
 // the type:
+//
 //	Slice: The size specifies the length. The capacity of the slice is
 //	equal to its length. A second integer argument may be provided to
 //	specify a different capacity; it must be no smaller than the
@ -225,7 +233,9 @@ func imag(c ComplexType) FloatType
 // the last sent value is received. After the last value has been received
 // from a closed channel c, any receive from c will succeed without
 // blocking, returning the zero value for the channel element. The form
+//
 //	x, ok := <-c
+//
 // will also set ok to false for a closed channel.
 func close(c chan<- Type)

--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@ -30,7 +30,7 @@ func Compare(a, b []byte) int {
 // explode splits s into a slice of UTF-8 sequences, one per Unicode code point (still slices of bytes),
 // up to a maximum of n byte slices. Invalid UTF-8 sequences are chopped into individual bytes.
 func explode(s []byte, n int) [][]byte {
-	if n <= 0 {
+	if n <= 0 || n > len(s) {
 		n = len(s)
 	}
 	a := make([][]byte, n)
@ -348,6 +348,9 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
 	if n < 0 {
 		n = Count(s, sep) + 1
 	}
+	if n > len(s)+1 {
+		n = len(s) + 1
+	}

 	a := make([][]byte, n)
 	n--
@ -369,9 +372,10 @@ func genSplit(s, sep []byte, sepSave, n int) [][]byte {
 // the subslices between those separators.
 // If sep is empty, SplitN splits after each UTF-8 sequence.
 // The count determines the number of subslices to return:
-//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
-//   n == 0: the result is nil (zero subslices)
-//   n < 0: all subslices
+//
+//	n > 0: at most n subslices; the last subslice will be the unsplit remainder.
+//	n == 0: the result is nil (zero subslices)
+//	n < 0: all subslices
 //
 // To split around the first instance of a separator, see Cut.
 func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
@ -380,9 +384,10 @@ func SplitN(s, sep []byte, n int) [][]byte { return genSplit(s, sep, 0, n) }
 // returns a slice of those subslices.
 // If sep is empty, SplitAfterN splits after each UTF-8 sequence.
 // The count determines the number of subslices to return:
-//   n > 0: at most n subslices; the last subslice will be the unsplit remainder.
-//   n == 0: the result is nil (zero subslices)
-//   n < 0: all subslices
+//
+//	n > 0: at most n subslices; the last subslice will be the unsplit remainder.
+//	n == 0: the result is nil (zero subslices)
+//	n < 0: all subslices
 func SplitAfterN(s, sep []byte, n int) [][]byte {
 	return genSplit(s, sep, len(sep), n)
 }
@ -1139,7 +1144,7 @@ func ReplaceAll(s, old, new []byte) []byte {
 }

 // EqualFold reports whether s and t, interpreted as UTF-8 strings,
-// are equal under Unicode case-folding, which is a more general
+// are equal under simple Unicode case-folding, which is a more general
 // form of case-insensitivity.
 func EqualFold(s, t []byte) bool {
 	for len(s) != 0 && len(t) != 0 {
--- a/src/bytes/bytes_test.go
+++ b/src/bytes/bytes_test.go
@ -8,6 +8,7 @@ import (
 	. "bytes"
 	"fmt"
 	"internal/testenv"
+	"math"
 	"math/rand"
 	"reflect"
 	"strings"
@ -723,6 +724,7 @@ var splittests = []SplitTest{
 	{"1 2", " ", 3, []string{"1", "2"}},
 	{"123", "", 2, []string{"1", "23"}},
 	{"123", "", 17, []string{"1", "2", "3"}},
+	{"bT", "T", math.MaxInt / 4, []string{"b", ""}},
 }

 func TestSplit(t *testing.T) {
--- a/src/cmd/addr2line/main.go
+++ b/src/cmd/addr2line/main.go
@ -6,6 +6,7 @@
 // just enough to support pprof.
 //
 // Usage:
+//
 //	go tool addr2line binary
 //
 // Addr2line reads hexadecimal addresses, one per line and with optional 0x prefix,
--- a/src/cmd/asm/doc.go
+++ b/src/cmd/asm/doc.go
@ -3,11 +3,11 @@
 // license that can be found in the LICENSE file.

 /*
-Asm, typically invoked as ``go tool asm'', assembles the source file into an object
+Asm, typically invoked as “go tool asm”, assembles the source file into an object
 file named for the basename of the argument source file with a .o suffix. The
 object file can then be combined with other objects into a package archive.

-Command Line
+# Command Line

 Usage:

--- a/src/cmd/asm/internal/asm/parse.go
+++ b/src/cmd/asm/internal/asm/parse.go
@ -162,7 +162,7 @@ func (p *Parser) nextToken() lex.ScanToken {

 // line consumes a single assembly line from p.lex of the form
 //
-//   {label:} WORD[.cond] [ arg {, arg} ] (';' | '\n')
+//	{label:} WORD[.cond] [ arg {, arg} ] (';' | '\n')
 //
 // It adds any labels to p.pendingLabels and returns the word, cond,
 // operand list, and true. If there is an error or EOF, it returns
@ -891,7 +891,7 @@ func (p *Parser) symRefAttrs(name string, issueError bool) (bool, obj.ABI) {
 // constrained form of the operand syntax that's always SB-based,
 // non-static, and has at most a simple integer offset:
 //
-//    [$|*]sym[<abi>][+Int](SB)
+//	[$|*]sym[<abi>][+Int](SB)
 func (p *Parser) funcAddress() (string, obj.ABI, bool) {
 	switch p.peek() {
 	case '$', '*':
@ -1041,9 +1041,13 @@ func (p *Parser) registerIndirect(a *obj.Addr, prefix rune) {
 //
 // For 386/AMD64 register list specifies 4VNNIW-style multi-source operand.
 // For range of 4 elements, Intel manual uses "+3" notation, for example:
+//
 //	VP4DPWSSDS zmm1{k1}{z}, zmm2+3, m128
+//
 // Given asm line:
+//
 //	VP4DPWSSDS Z5, [Z10-Z13], (AX)
+//
 // zmm2 is Z10, and Z13 is the only valid value for it (Z10+3).
 // Only simple ranges are accepted, like [Z0-Z3].
 //
--- a/src/cmd/asm/internal/lex/tokenizer.go
+++ b/src/cmd/asm/internal/lex/tokenizer.go
@ -109,7 +109,7 @@ func (t *Tokenizer) Next() ScanToken {
 		}
 		text := s.TokenText()
 		t.line += strings.Count(text, "\n")
-		// TODO: Use constraint.IsGoBuild once it exists.
+		// TODO: Use constraint.IsGoBuild once #44505 fixed.
 		if strings.HasPrefix(text, "//go:build") {
 			t.tok = BuildComment
 			break
--- a/src/cmd/buildid/doc.go
+++ b/src/cmd/buildid/doc.go
@ -6,6 +6,7 @@
 Buildid displays or updates the build ID stored in a Go package or binary.

 Usage:
+
 	go tool buildid [-w] file

 By default, buildid prints the build ID found in the named file.
--- a/src/cmd/cgo/doc.go
+++ b/src/cmd/cgo/doc.go
@ -3,10 +3,9 @@
 // license that can be found in the LICENSE file.

 /*
-
 Cgo enables the creation of Go packages that call C code.

-Using cgo with the go command
+# Using cgo with the go command

 To use cgo write normal Go code that imports a pseudo-package "C".
 The Go code can then refer to types such as C.size_t, variables such
@ -91,11 +90,11 @@ file. This allows pre-compiled static libraries to be included in the package
 directory and linked properly.
 For example if package foo is in the directory /go/src/foo:

-       // #cgo LDFLAGS: -L${SRCDIR}/libs -lfoo
+	// #cgo LDFLAGS: -L${SRCDIR}/libs -lfoo

 Will be expanded to:

-       // #cgo LDFLAGS: -L/go/src/foo/libs -lfoo
+	// #cgo LDFLAGS: -L/go/src/foo/libs -lfoo

 When the Go tool sees that one or more Go files use the special import
 "C", it will look for other non-Go files in the directory and compile
@ -139,7 +138,7 @@ or you can set the CC environment variable any time you run the go tool.
 The CXX_FOR_TARGET, CXX_FOR_${GOOS}_${GOARCH}, and CXX
 environment variables work in a similar way for C++ code.

-Go references to C
+# Go references to C

 Within the Go file, C's struct field names that are keywords in Go
 can be accessed by prefixing them with an underscore: if x points at a C
@ -291,7 +290,7 @@ the helper function crashes the program, like when Go itself runs out
 of memory. Because C.malloc cannot fail, it has no two-result form
 that returns errno.

-C references to Go
+# C references to Go

 Go functions can be exported for use by C code in the following way:

@ -327,7 +326,7 @@ definitions and declarations, then the two output files will produce
 duplicate symbols and the linker will fail. To avoid this, definitions
 must be placed in preambles in other files, or in C source files.

-Passing pointers
+# Passing pointers

 Go is a garbage collected language, and the garbage collector needs to
 know the location of every pointer to Go memory. Because of this,
@ -398,7 +397,7 @@ passing uninitialized C memory to Go code if the Go code is going to
 store pointer values in it. Zero out the memory in C before passing it
 to Go.

-Special cases
+# Special cases

 A few special C types which would normally be represented by a pointer
 type in Go are instead represented by a uintptr. Those include:
@ -449,9 +448,10 @@ to auto-update code from Go 1.14 and earlier:

 	go tool fix -r eglconf <pkg>

-Using cgo directly
+# Using cgo directly

 Usage:
+
 	go tool cgo [cgo options] [-- compiler options] gofiles...

 Cgo transforms the specified input Go source files into several output
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@ -114,11 +114,11 @@ func (p *Package) addToFlag(flag string, args []string) {
 //
 // For example, the following string:
 //
-//     `a b:"c d" 'e''f'  "g\""`
+//	`a b:"c d" 'e''f'  "g\""`
 //
 // Would be parsed as:
 //
-//     []string{"a", "b:c d", "ef", `g"`}
+//	[]string{"a", "b:c d", "ef", `g"`}
 func splitQuoted(s string) (r []string, err error) {
 	var args []string
 	arg := make([]rune, len(s))
@ -1137,13 +1137,19 @@ func (p *Package) mangle(f *File, arg *ast.Expr, addPosition bool) (ast.Expr, bo

 // checkIndex checks whether arg has the form &a[i], possibly inside
 // type conversions. If so, then in the general case it writes
-//    _cgoIndexNN := a
-//    _cgoNN := &cgoIndexNN[i] // with type conversions, if any
+//
+//	_cgoIndexNN := a
+//	_cgoNN := &cgoIndexNN[i] // with type conversions, if any
+//
 // to sb, and writes
-//    _cgoCheckPointer(_cgoNN, _cgoIndexNN)
+//
+//	_cgoCheckPointer(_cgoNN, _cgoIndexNN)
+//
 // to sbCheck, and returns true. If a is a simple variable or field reference,
 // it writes
-//    _cgoIndexNN := &a
+//
+//	_cgoIndexNN := &a
+//
 // and dereferences the uses of _cgoIndexNN. Taking the address avoids
 // making a copy of an array.
 //
@ -1191,10 +1197,14 @@ func (p *Package) checkIndex(sb, sbCheck *bytes.Buffer, arg ast.Expr, i int) boo

 // checkAddr checks whether arg has the form &x, possibly inside type
 // conversions. If so, it writes
-//    _cgoBaseNN := &x
-//    _cgoNN := _cgoBaseNN // with type conversions, if any
+//
+//	_cgoBaseNN := &x
+//	_cgoNN := _cgoBaseNN // with type conversions, if any
+//
 // to sb, and writes
-//    _cgoCheckPointer(_cgoBaseNN, true)
+//
+//	_cgoCheckPointer(_cgoBaseNN, true)
+//
 // to sbCheck, and returns true. This tells _cgoCheckPointer to check
 // just the contents of the pointer being passed, not any other part
 // of the memory allocation. This is run after checkIndex, which looks
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@ -1399,6 +1399,19 @@ func (p *Package) cgoType(e ast.Expr) *Type {
 	case *ast.ChanType:
 		return &Type{Size: p.PtrSize, Align: p.PtrSize, C: c("GoChan")}
 	case *ast.Ident:
+		goTypesFixup := func(r *Type) *Type {
+			if r.Size == 0 { // int or uint
+				rr := new(Type)
+				*rr = *r
+				rr.Size = p.IntSize
+				rr.Align = p.IntSize
+				r = rr
+			}
+			if r.Align > p.PtrSize {
+				r.Align = p.PtrSize
+			}
+			return r
+		}
 		// Look up the type in the top level declarations.
 		// TODO: Handle types defined within a function.
 		for _, d := range p.Decl {
@ -1417,6 +1430,17 @@ func (p *Package) cgoType(e ast.Expr) *Type {
 			}
 		}
 		if def := typedef[t.Name]; def != nil {
+			if defgo, ok := def.Go.(*ast.Ident); ok {
+				switch defgo.Name {
+				case "complex64", "complex128":
+					// MSVC does not support the _Complex keyword
+					// nor the complex macro.
+					// Use GoComplex64 and GoComplex128 instead,
+					// which are typedef-ed to a compatible type.
+					// See go.dev/issues/36233.
+					return goTypesFixup(goTypes[defgo.Name])
+				}
+			}
 			return def
 		}
 		if t.Name == "uintptr" {
@ -1430,17 +1454,7 @@ func (p *Package) cgoType(e ast.Expr) *Type {
 			return &Type{Size: 2 * p.PtrSize, Align: p.PtrSize, C: c("GoInterface")}
 		}
 		if r, ok := goTypes[t.Name]; ok {
-			if r.Size == 0 { // int or uint
-				rr := new(Type)
-				*rr = *r
-				rr.Size = p.IntSize
-				rr.Align = p.IntSize
-				r = rr
-			}
-			if r.Align > p.PtrSize {
-				r.Align = p.PtrSize
-			}
-			return r
+			return goTypesFixup(r)
 		}
 		error_(e.Pos(), "unrecognized Go type %s", t.Name)
 		return &Type{Size: 4, Align: 4, C: c("int")}
@ -1895,8 +1909,14 @@ typedef GoUintGOINTBITS GoUint;
 typedef size_t GoUintptr;
 typedef float GoFloat32;
 typedef double GoFloat64;
+#ifdef _MSC_VER
+#include <complex.h>
+typedef _Fcomplex GoComplex64;
+typedef _Dcomplex GoComplex128;
+#else
 typedef float _Complex GoComplex64;
 typedef double _Complex GoComplex128;
+#endif

 /*
  static assertion to make sure the file is being used on architecture
--- a/src/cmd/compile/internal/abi/abiutils.go
+++ b/src/cmd/compile/internal/abi/abiutils.go
@ -258,7 +258,7 @@ type RegAmounts struct {
 // by the ABI rules for parameter passing and result returning.
 type ABIConfig struct {
 	// Do we need anything more than this?
-	offsetForLocals  int64 // e.g., obj.(*Link).FixedFrameSize() -- extra linkage information on some architectures.
+	offsetForLocals  int64 // e.g., obj.(*Link).Arch.FixedFrameSize -- extra linkage information on some architectures.
 	regAmounts       RegAmounts
 	regsForTypeCache map[*types.Type]int
 }
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@ -111,7 +111,9 @@ func moveByType(t *types.Type) obj.As {
 }

 // opregreg emits instructions for
-//     dest := dest(To) op src(From)
+//
+//	dest := dest(To) op src(From)
+//
 // and also returns the created obj.Prog so it
 // may be further adjusted (offset, scale, etc).
 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
@ -280,8 +282,15 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Reg = v.Reg()
 		p.SetFrom3Reg(v.Args[1].Reg())

+	case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
+		ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
+		ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
+		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
+		p.SetFrom3Reg(v.Args[0].Reg())
+
 	case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
-		ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload:
+		ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
+		ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
 		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
 		m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
 		ssagen.AddAux(&m, v)
@ -289,8 +298,10 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {

 	case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
 		ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
+		ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
 		ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
-		ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8:
+		ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
+		ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
 		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
 		m := obj.Addr{Type: obj.TYPE_MEM}
 		memIdx(&m, v)
@ -786,7 +797,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
-		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
+		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
+		ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
 		p := s.Prog(v.Op.Asm())
 		memIdx(&p.From, v)
 		ssagen.AddAux(&p.From, v)
@ -808,7 +820,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
 		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
 		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
-		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8:
+		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
+		ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = v.Args[2].Reg()
@ -1087,7 +1100,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		}
 		p := s.Prog(mov)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
@ -1387,6 +1400,16 @@ func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
 			}
 		}

+	case ssa.BlockAMD64JUMPTABLE:
+		// JMP      *(TABLE)(INDEX*8)
+		p := s.Prog(obj.AJMP)
+		p.To.Type = obj.TYPE_MEM
+		p.To.Reg = b.Controls[1].Reg()
+		p.To.Index = b.Controls[0].Reg()
+		p.To.Scale = 8
+		// Save jump tables for later resolution of the target blocks.
+		s.JumpTables = append(s.JumpTables, b)
+
 	default:
 		b.Fatalf("branch not implemented: %s", b.LongString())
 	}
--- a/src/cmd/compile/internal/amd64/versions_test.go
+++ b/src/cmd/compile/internal/amd64/versions_test.go
@ -241,6 +241,7 @@ var featureToOpcodes = map[string][]string{
 	// native objdump doesn't include [QL] on linux.
 	"popcnt": {"popcntq", "popcntl", "popcnt"},
 	"bmi1":   {"andnq", "andnl", "andn", "blsiq", "blsil", "blsi", "blsmskq", "blsmskl", "blsmsk", "blsrq", "blsrl", "blsr", "tzcntq", "tzcntl", "tzcnt"},
+	"bmi2":   {"sarxq", "sarxl", "sarx", "shlxq", "shlxl", "shlx", "shrxq", "shrxl", "shrx"},
 	"sse41":  {"roundsd"},
 	"fma":    {"vfmadd231sd"},
 	"movbe":  {"movbeqq", "movbeq", "movbell", "movbel", "movbe"},
--- a/src/cmd/compile/internal/arm/ssa.go
+++ b/src/cmd/compile/internal/arm/ssa.go
@ -854,7 +854,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(arm.AMOVW)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@ -171,7 +171,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		for _, a := range v.Block.Func.RegArgs {
 			// Pass the spill/unspill information along to the assembler, offset by size of
 			// the saved LR slot.
-			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.FixedFrameSize())
+			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
 			s.FuncInfo().AddSpill(
 				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
 		}
@ -1128,7 +1128,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(arm64.AMOVD)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/deadcode/deadcode.go
+++ b/src/cmd/compile/internal/deadcode/deadcode.go
@ -6,6 +6,7 @@ package deadcode

 import (
 	"go/constant"
+	"go/token"

 	"cmd/compile/internal/base"
 	"cmd/compile/internal/ir"
@ -86,6 +87,85 @@ func stmts(nn *ir.Nodes) {
 				}
 			}
 		}
+		if n.Op() == ir.OSWITCH {
+			n := n.(*ir.SwitchStmt)
+			// Use a closure wrapper here so we can use "return" to abort the analysis.
+			func() {
+				if n.Tag != nil && n.Tag.Op() == ir.OTYPESW {
+					return // no special type-switch case yet.
+				}
+				var x constant.Value // value we're switching on
+				if n.Tag != nil {
+					if ir.ConstType(n.Tag) == constant.Unknown {
+						return
+					}
+					x = n.Tag.Val()
+				} else {
+					x = constant.MakeBool(true) // switch { ... }  =>  switch true { ... }
+				}
+				var def *ir.CaseClause
+				for _, cas := range n.Cases {
+					if len(cas.List) == 0 { // default case
+						def = cas
+						continue
+					}
+					for _, c := range cas.List {
+						if ir.ConstType(c) == constant.Unknown {
+							return // can't statically tell if it matches or not - give up.
+						}
+						if constant.Compare(x, token.EQL, c.Val()) {
+							for _, n := range cas.Body {
+								if n.Op() == ir.OFALL {
+									return // fallthrough makes it complicated - abort.
+								}
+							}
+							// This switch entry is the one that always triggers.
+							for _, cas2 := range n.Cases {
+								for _, c2 := range cas2.List {
+									if cas2 != cas || c2 != c {
+										ir.Visit(c2, markHiddenClosureDead)
+									}
+								}
+								if cas2 != cas {
+									ir.VisitList(cas2.Body, markHiddenClosureDead)
+								}
+							}
+
+							cas.List[0] = c
+							cas.List = cas.List[:1]
+							n.Cases[0] = cas
+							n.Cases = n.Cases[:1]
+							return
+						}
+					}
+				}
+				if def != nil {
+					for _, n := range def.Body {
+						if n.Op() == ir.OFALL {
+							return // fallthrough makes it complicated - abort.
+						}
+					}
+					for _, cas := range n.Cases {
+						if cas != def {
+							ir.VisitList(cas.List, markHiddenClosureDead)
+							ir.VisitList(cas.Body, markHiddenClosureDead)
+						}
+					}
+					n.Cases[0] = def
+					n.Cases = n.Cases[:1]
+					return
+				}
+
+				// TODO: handle case bodies ending with panic/return as we do in the IF case above.
+
+				// entire switch is a nop - no case ever triggers
+				for _, cas := range n.Cases {
+					ir.VisitList(cas.List, markHiddenClosureDead)
+					ir.VisitList(cas.Body, markHiddenClosureDead)
+				}
+				n.Cases = n.Cases[:0]
+			}()
+		}

 		if len(n.Init()) != 0 {
 			stmts(n.(ir.InitNode).PtrInit())
--- a/src/cmd/compile/internal/dwarfgen/dwarf.go
+++ b/src/cmd/compile/internal/dwarfgen/dwarf.go
@ -339,7 +339,7 @@ func createSimpleVar(fnsym *obj.LSym, n *ir.Name) *dwarf.Var {

 	localAutoOffset := func() int64 {
 		offs = n.FrameOffset()
-		if base.Ctxt.FixedFrameSize() == 0 {
+		if base.Ctxt.Arch.FixedFrameSize == 0 {
 			offs -= int64(types.PtrSize)
 		}
 		if buildcfg.FramePointerEnabled {
@ -357,7 +357,7 @@ func createSimpleVar(fnsym *obj.LSym, n *ir.Name) *dwarf.Var {
 		if n.IsOutputParamInRegisters() {
 			offs = localAutoOffset()
 		} else {
-			offs = n.FrameOffset() + base.Ctxt.FixedFrameSize()
+			offs = n.FrameOffset() + base.Ctxt.Arch.FixedFrameSize
 		}

 	default:
@ -547,7 +547,7 @@ func RecordFlags(flags ...string) {
 		fmt.Fprintf(&cmd, " -%s=%v", f.Name, getter.Get())
 	}

-	// Adds flag to producer string singalling whether regabi is turned on or
+	// Adds flag to producer string signaling whether regabi is turned on or
 	// off.
 	// Once regabi is turned on across the board and the relative GOEXPERIMENT
 	// knobs no longer exist this code should be removed.
--- a/src/cmd/compile/internal/escape/desugar.go
+++ b/src/cmd/compile/internal/escape/desugar.go
@ -24,9 +24,9 @@ func fixRecoverCall(call *ir.CallExpr) {

 	pos := call.Pos()

-	// FP is equal to caller's SP plus FixedFrameSize().
+	// FP is equal to caller's SP plus FixedFrameSize.
 	var fp ir.Node = ir.NewCallExpr(pos, ir.OGETCALLERSP, nil, nil)
-	if off := base.Ctxt.FixedFrameSize(); off != 0 {
+	if off := base.Ctxt.Arch.FixedFrameSize; off != 0 {
 		fp = ir.NewBinaryExpr(fp.Pos(), ir.OADD, fp, ir.NewInt(off))
 	}
 	// TODO(mdempsky): Replace *int32 with unsafe.Pointer, without upsetting checkptr.
--- a/src/cmd/compile/internal/escape/escape.go
+++ b/src/cmd/compile/internal/escape/escape.go
@ -210,6 +210,9 @@ func (b *batch) walkFunc(fn *ir.Func) {
 		switch n.Op() {
 		case ir.OLABEL:
 			n := n.(*ir.LabelStmt)
+			if n.Label.IsBlank() {
+				break
+			}
 			if e.labels == nil {
 				e.labels = make(map[*types.Sym]labelState)
 			}
--- a/src/cmd/compile/internal/escape/stmt.go
+++ b/src/cmd/compile/internal/escape/stmt.go
@ -50,6 +50,9 @@ func (e *escape) stmt(n ir.Node) {

 	case ir.OLABEL:
 		n := n.(*ir.LabelStmt)
+		if n.Label.IsBlank() {
+			break
+		}
 		switch e.labels[n.Label] {
 		case nonlooping:
 			if base.Flag.LowerM > 2 {
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@ -271,6 +271,9 @@ func addGCLocals() {
 			objw.Global(x, int32(len(x.P)), obj.RODATA|obj.DUPOK)
 			x.Set(obj.AttrStatic, true)
 		}
+		for _, jt := range fn.JumpTables {
+			objw.Global(jt.Sym, int32(len(jt.Targets)*base.Ctxt.Arch.PtrSize), obj.RODATA)
+		}
 	}
 }

--- a/src/cmd/compile/internal/inline/inl.go
+++ b/src/cmd/compile/internal/inline/inl.go
@ -522,7 +522,8 @@ func InlineCalls(fn *ir.Func) {
 // but then you may as well do it here.  so this is cleaner and
 // shorter and less complicated.
 // The result of inlnode MUST be assigned back to n, e.g.
-// 	n.Left = inlnode(n.Left)
+//
+//	n.Left = inlnode(n.Left)
 func inlnode(n ir.Node, maxCost int32, inlMap map[*ir.Func]bool, edit func(ir.Node) ir.Node) ir.Node {
 	if n == nil {
 		return n
@ -657,7 +658,8 @@ var NewInline = func(call *ir.CallExpr, fn *ir.Func, inlIndex int) *ir.InlinedCa
 // inlined function body, and (List, Rlist) contain the (input, output)
 // parameters.
 // The result of mkinlcall MUST be assigned back to n, e.g.
-// 	n.Left = mkinlcall(n.Left, fn, isddd)
+//
+//	n.Left = mkinlcall(n.Left, fn, isddd)
 func mkinlcall(n *ir.CallExpr, fn *ir.Func, maxCost int32, inlMap map[*ir.Func]bool, edit func(ir.Node) ir.Node) ir.Node {
 	if fn.Inl == nil {
 		if logopt.Enabled() {
--- a/src/cmd/compile/internal/ir/const.go
+++ b/src/cmd/compile/internal/ir/const.go
@ -26,7 +26,7 @@ func NewString(s string) Node {
 }

 const (
-	// Maximum size in bits for big.Ints before signalling
+	// Maximum size in bits for big.Ints before signaling
 	// overflow and also mantissa precision for big.Floats.
 	ConstPrec = 512
 )
--- a/src/cmd/compile/internal/ir/expr.go
+++ b/src/cmd/compile/internal/ir/expr.go
@ -951,11 +951,11 @@ var IsIntrinsicCall = func(*CallExpr) bool { return false }
 // instead of computing both. SameSafeExpr assumes that l and r are
 // used in the same statement or expression. In order for it to be
 // safe to reuse l or r, they must:
-//  * be the same expression
-//  * not have side-effects (no function calls, no channel ops);
-//    however, panics are ok
-//  * not cause inappropriate aliasing; e.g. two string to []byte
-//    conversions, must result in two distinct slices
+//   - be the same expression
+//   - not have side-effects (no function calls, no channel ops);
+//     however, panics are ok
+//   - not cause inappropriate aliasing; e.g. two string to []byte
+//     conversions, must result in two distinct slices
 //
 // The handling of OINDEXMAP is subtle. OINDEXMAP can occur both
 // as an lvalue (map assignment) and an rvalue (map access). This is
--- a/src/cmd/compile/internal/ir/node.go
+++ b/src/cmd/compile/internal/ir/node.go
@ -310,6 +310,7 @@ const (
 	ORESULT        // result of a function call; Xoffset is stack offset
 	OINLMARK       // start of an inlined body, with file/line of caller. Xoffset is an index into the inline tree.
 	OLINKSYMOFFSET // offset within a name
+	OJUMPTABLE     // A jump table structure for implementing dense expression switches

 	// opcodes for generics
 	ODYNAMICDOTTYPE  // x = i.(T) where T is a type parameter (or derived from a type parameter)
@ -551,7 +552,8 @@ func SetPos(n Node) src.XPos {
 }

 // The result of InitExpr MUST be assigned back to n, e.g.
-// 	n.X = InitExpr(init, n.X)
+//
+//	n.X = InitExpr(init, n.X)
 func InitExpr(init []Node, expr Node) Node {
 	if len(init) == 0 {
 		return expr
--- a/src/cmd/compile/internal/ir/node_gen.go
+++ b/src/cmd/compile/internal/ir/node_gen.go
@ -712,6 +712,28 @@ func (n *InstExpr) editChildren(edit func(Node) Node) {
 	editNodes(n.Targs, edit)
 }

+func (n *JumpTableStmt) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
+func (n *JumpTableStmt) copy() Node {
+	c := *n
+	c.init = copyNodes(c.init)
+	return &c
+}
+func (n *JumpTableStmt) doChildren(do func(Node) bool) bool {
+	if doNodes(n.init, do) {
+		return true
+	}
+	if n.Idx != nil && do(n.Idx) {
+		return true
+	}
+	return false
+}
+func (n *JumpTableStmt) editChildren(edit func(Node) Node) {
+	editNodes(n.init, edit)
+	if n.Idx != nil {
+		n.Idx = edit(n.Idx).(Node)
+	}
+}
+
 func (n *KeyExpr) Format(s fmt.State, verb rune) { fmtNode(n, s, verb) }
 func (n *KeyExpr) copy() Node {
 	c := *n
--- a/src/cmd/compile/internal/ir/op_string.go
+++ b/src/cmd/compile/internal/ir/op_string.go
@ -154,19 +154,20 @@ func _() {
 	_ = x[ORESULT-143]
 	_ = x[OINLMARK-144]
 	_ = x[OLINKSYMOFFSET-145]
-	_ = x[ODYNAMICDOTTYPE-146]
-	_ = x[ODYNAMICDOTTYPE2-147]
-	_ = x[ODYNAMICTYPE-148]
-	_ = x[OTAILCALL-149]
-	_ = x[OGETG-150]
-	_ = x[OGETCALLERPC-151]
-	_ = x[OGETCALLERSP-152]
-	_ = x[OEND-153]
+	_ = x[OJUMPTABLE-146]
+	_ = x[ODYNAMICDOTTYPE-147]
+	_ = x[ODYNAMICDOTTYPE2-148]
+	_ = x[ODYNAMICTYPE-149]
+	_ = x[OTAILCALL-150]
+	_ = x[OGETG-151]
+	_ = x[OGETCALLERPC-152]
+	_ = x[OGETCALLERSP-153]
+	_ = x[OEND-154]
 }

-const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"
+const _Op_name = "XXXNAMENONAMETYPELITERALNILADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESSLICE2ARRPTRASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVIDATACONVNOPCOPYDCLDCLFUNCDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMAKESLICECOPYMULDIVMODLSHRSHANDANDNOTNEWNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECOVERFPRECVRUNESTRSELRECV2REALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFUNSAFEADDUNSAFESLICEMETHEXPRMETHVALUEBLOCKBREAKCASECONTINUEDEFERFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWFUNCINSTTFUNCINLCALLEFACEITABIDATASPTRCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKLINKSYMOFFSETJUMPTABLEDYNAMICDOTTYPEDYNAMICDOTTYPE2DYNAMICTYPETAILCALLGETGGETCALLERPCGETCALLERSPEND"

-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 875, 890, 901, 909, 913, 924, 935, 938}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 24, 27, 30, 33, 35, 38, 44, 48, 54, 60, 69, 81, 90, 99, 111, 120, 132, 134, 137, 147, 154, 161, 168, 172, 176, 184, 192, 201, 204, 209, 216, 223, 229, 238, 246, 254, 260, 264, 273, 282, 289, 293, 296, 303, 311, 318, 324, 327, 333, 340, 348, 352, 359, 367, 369, 371, 373, 375, 377, 379, 384, 389, 397, 400, 409, 412, 416, 424, 431, 440, 453, 456, 459, 462, 465, 468, 471, 477, 480, 483, 489, 493, 496, 500, 505, 510, 516, 521, 525, 530, 538, 546, 552, 561, 572, 579, 588, 592, 599, 607, 611, 615, 622, 629, 637, 643, 652, 663, 671, 680, 685, 690, 694, 702, 707, 711, 714, 722, 726, 728, 733, 735, 740, 746, 752, 758, 764, 772, 777, 784, 789, 793, 798, 802, 807, 815, 821, 828, 835, 841, 848, 861, 870, 884, 899, 910, 918, 922, 933, 944, 947}

 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
--- a/src/cmd/compile/internal/ir/stmt.go
+++ b/src/cmd/compile/internal/ir/stmt.go
@ -8,6 +8,7 @@ import (
 	"cmd/compile/internal/base"
 	"cmd/compile/internal/types"
 	"cmd/internal/src"
+	"go/constant"
 )

 // A Decl is a declaration of a const, type, or var. (A declared func is a Func.)
@ -262,6 +263,37 @@ func NewIfStmt(pos src.XPos, cond Node, body, els []Node) *IfStmt {
 	return n
 }

+// A JumpTableStmt is used to implement switches. Its semantics are:
+//   tmp := jt.Idx
+//   if tmp == Cases[0] goto Targets[0]
+//   if tmp == Cases[1] goto Targets[1]
+//   ...
+//   if tmp == Cases[n] goto Targets[n]
+// Note that a JumpTableStmt is more like a multiway-goto than
+// a multiway-if. In particular, the case bodies are just
+// labels to jump to, not not full Nodes lists.
+type JumpTableStmt struct {
+	miniStmt
+
+	// Value used to index the jump table.
+	// We support only integer types that
+	// are at most the size of a uintptr.
+	Idx Node
+
+	// If Idx is equal to Cases[i], jump to Targets[i].
+	// Cases entries must be distinct and in increasing order.
+	// The length of Cases and Targets must be equal.
+	Cases   []constant.Value
+	Targets []*types.Sym
+}
+
+func NewJumpTableStmt(pos src.XPos, idx Node) *JumpTableStmt {
+	n := &JumpTableStmt{Idx: idx}
+	n.pos = pos
+	n.op = OJUMPTABLE
+	return n
+}
+
 // An InlineMarkStmt is a marker placed just before an inlined body.
 type InlineMarkStmt struct {
 	miniStmt
--- a/src/cmd/compile/internal/liveness/plive.go
+++ b/src/cmd/compile/internal/liveness/plive.go
@ -244,8 +244,10 @@ func (lv *liveness) initcache() {
 // liveness effects on a variable.
 //
 // The possible flags are:
+//
 //	uevar - used by the instruction
 //	varkill - killed by the instruction (set)
+//
 // A kill happens after the use (for an instruction that updates a value, for example).
 type liveEffect int

@ -1460,14 +1462,14 @@ func (lv *liveness) emitStackObjects() *obj.LSym {
 // isfat reports whether a variable of type t needs multiple assignments to initialize.
 // For example:
 //
-// 	type T struct { x, y int }
-// 	x := T{x: 0, y: 1}
+//	type T struct { x, y int }
+//	x := T{x: 0, y: 1}
 //
 // Then we need:
 //
-// 	var t T
-// 	t.x = 0
-// 	t.y = 1
+//	var t T
+//	t.x = 0
+//	t.y = 1
 //
 // to fully initialize t.
 func isfat(t *types.Type) bool {
--- a/src/cmd/compile/internal/mips/ggen.go
+++ b/src/cmd/compile/internal/mips/ggen.go
@ -20,7 +20,7 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
 	}
 	if cnt < int64(4*types.PtrSize) {
 		for i := int64(0); i < cnt; i += int64(types.PtrSize) {
-			p = pp.Append(p, mips.AMOVW, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, base.Ctxt.FixedFrameSize()+off+i)
+			p = pp.Append(p, mips.AMOVW, obj.TYPE_REG, mips.REGZERO, 0, obj.TYPE_MEM, mips.REGSP, base.Ctxt.Arch.FixedFrameSize+off+i)
 		}
 	} else {
 		//fmt.Printf("zerorange frame:%v, lo: %v, hi:%v \n", frame ,lo, hi)
@ -30,7 +30,7 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
 		//	MOVW	R0, (Widthptr)r1
 		//	ADD 	$Widthptr, r1
 		//	BNE		r1, r2, loop
-		p = pp.Append(p, mips.AADD, obj.TYPE_CONST, 0, base.Ctxt.FixedFrameSize()+off-4, obj.TYPE_REG, mips.REGRT1, 0)
+		p = pp.Append(p, mips.AADD, obj.TYPE_CONST, 0, base.Ctxt.Arch.FixedFrameSize+off-4, obj.TYPE_REG, mips.REGRT1, 0)
 		p.Reg = mips.REGSP
 		p = pp.Append(p, mips.AADD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, mips.REGRT2, 0)
 		p.Reg = mips.REGRT1
--- a/src/cmd/compile/internal/mips/ssa.go
+++ b/src/cmd/compile/internal/mips/ssa.go
@ -792,7 +792,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(mips.AMOVW)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/mips64/ssa.go
+++ b/src/cmd/compile/internal/mips64/ssa.go
@ -762,7 +762,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(mips.AMOVV)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/noder/expr.go
+++ b/src/cmd/compile/internal/noder/expr.go
@ -6,6 +6,7 @@ package noder

 import (
 	"fmt"
+	"go/constant"

 	"cmd/compile/internal/base"
 	"cmd/compile/internal/ir"
@ -62,6 +63,14 @@ func (g *irgen) expr(expr syntax.Expr) ir.Node {
 		case types2.UntypedNil:
 			// ok; can appear in type switch case clauses
 			// TODO(mdempsky): Handle as part of type switches instead?
+		case types2.UntypedInt, types2.UntypedFloat, types2.UntypedComplex:
+			// Untyped rhs of non-constant shift, e.g. x << 1.0.
+			// If we have a constant value, it must be an int >= 0.
+			if tv.Value != nil {
+				s := constant.ToInt(tv.Value)
+				assert(s.Kind() == constant.Int && constant.Sign(s) >= 0)
+			}
+			typ = types2.Typ[types2.Uint]
 		case types2.UntypedBool:
 			typ = types2.Typ[types2.Bool] // expression in "if" or "for" condition
 		case types2.UntypedString:
--- a/src/cmd/compile/internal/noder/noder.go
+++ b/src/cmd/compile/internal/noder/noder.go
@ -33,30 +33,36 @@ func LoadPackage(filenames []string) {
 	sem := make(chan struct{}, runtime.GOMAXPROCS(0)+10)

 	noders := make([]*noder, len(filenames))
-	for i, filename := range filenames {
+	for i := range noders {
 		p := noder{
 			err: make(chan syntax.Error),
 		}
 		noders[i] = &p
-
-		filename := filename
-		go func() {
-			sem <- struct{}{}
-			defer func() { <-sem }()
-			defer close(p.err)
-			fbase := syntax.NewFileBase(filename)
-
-			f, err := os.Open(filename)
-			if err != nil {
-				p.error(syntax.Error{Msg: err.Error()})
-				return
-			}
-			defer f.Close()
-
-			p.file, _ = syntax.Parse(fbase, f, p.error, p.pragma, mode) // errors are tracked via p.error
-		}()
 	}

+	// Move the entire syntax processing logic into a separate goroutine to avoid blocking on the "sem".
+	go func() {
+		for i, filename := range filenames {
+			filename := filename
+			p := noders[i]
+			sem <- struct{}{}
+			go func() {
+				defer func() { <-sem }()
+				defer close(p.err)
+				fbase := syntax.NewFileBase(filename)
+
+				f, err := os.Open(filename)
+				if err != nil {
+					p.error(syntax.Error{Msg: err.Error()})
+					return
+				}
+				defer f.Close()
+
+				p.file, _ = syntax.Parse(fbase, f, p.error, p.pragma, mode) // errors are tracked via p.error
+			}()
+		}
+	}()
+
 	var lines uint
 	for _, p := range noders {
 		for e := range p.err {
--- a/src/cmd/compile/internal/noder/stencil.go
+++ b/src/cmd/compile/internal/noder/stencil.go
@ -741,6 +741,7 @@ func (g *genInst) genericSubst(newsym *types.Sym, nameNode *ir.Name, tparams []*
 	// Pos of the instantiated function is same as the generic function
 	newf := ir.NewFunc(gf.Pos())
 	newf.Pragma = gf.Pragma // copy over pragmas from generic function to stenciled implementation.
+	newf.Endlineno = gf.Endlineno
 	newf.Nname = ir.NewNameAt(gf.Pos(), newsym)
 	newf.Nname.Func = newf
 	newf.Nname.Defn = newf
--- a/src/cmd/compile/internal/noder/unified.go
+++ b/src/cmd/compile/internal/noder/unified.go
@ -33,38 +33,38 @@ var localPkgReader *pkgReader
 //
 // The pipeline contains 2 steps:
 //
-//  1) Generate package export data "stub".
+//  1. Generate package export data "stub".
 //
-//  2) Generate package IR from package export data.
+//  2. Generate package IR from package export data.
 //
 // The package data "stub" at step (1) contains everything from the local package,
 // but nothing that have been imported. When we're actually writing out export data
 // to the output files (see writeNewExport function), we run the "linker", which does
 // a few things:
 //
-//  + Updates compiler extensions data (e.g., inlining cost, escape analysis results).
+//   - Updates compiler extensions data (e.g., inlining cost, escape analysis results).
 //
-//  + Handles re-exporting any transitive dependencies.
+//   - Handles re-exporting any transitive dependencies.
 //
-//  + Prunes out any unnecessary details (e.g., non-inlineable functions, because any
-//    downstream importers only care about inlinable functions).
+//   - Prunes out any unnecessary details (e.g., non-inlineable functions, because any
+//     downstream importers only care about inlinable functions).
 //
 // The source files are typechecked twice, once before writing export data
 // using types2 checker, once after read export data using gc/typecheck.
 // This duplication of work will go away once we always use types2 checker,
 // we can remove the gc/typecheck pass. The reason it is still here:
 //
-//  + It reduces engineering costs in maintaining a fork of typecheck
-//    (e.g., no need to backport fixes like CL 327651).
+//   - It reduces engineering costs in maintaining a fork of typecheck
+//     (e.g., no need to backport fixes like CL 327651).
 //
-//  + It makes it easier to pass toolstash -cmp.
+//   - It makes it easier to pass toolstash -cmp.
 //
-//  + Historically, we would always re-run the typechecker after import, even though
-//    we know the imported data is valid. It's not ideal, but also not causing any
-//    problem either.
+//   - Historically, we would always re-run the typechecker after import, even though
+//     we know the imported data is valid. It's not ideal, but also not causing any
+//     problem either.
 //
-//  + There's still transformation that being done during gc/typecheck, like rewriting
-//    multi-valued function call, or transform ir.OINDEX -> ir.OINDEXMAP.
+//   - There's still transformation that being done during gc/typecheck, like rewriting
+//     multi-valued function call, or transform ir.OINDEX -> ir.OINDEXMAP.
 //
 // Using syntax+types2 tree, which already has a complete representation of generics,
 // the unified IR has the full typed AST for doing introspection during step (1).
--- a/src/cmd/compile/internal/pkginit/init.go
+++ b/src/cmd/compile/internal/pkginit/init.go
@ -65,9 +65,9 @@ func MakeInit() {
 // Task makes and returns an initialization record for the package.
 // See runtime/proc.go:initTask for its layout.
 // The 3 tasks for initialization are:
-//   1) Initialize all of the packages the current package depends on.
-//   2) Initialize all the variables that have initializers.
-//   3) Run any init functions.
+//  1. Initialize all of the packages the current package depends on.
+//  2. Initialize all the variables that have initializers.
+//  3. Run any init functions.
 func Task() *ir.Name {
 	var deps []*obj.LSym // initTask records for packages the current package depends on
 	var fns []*obj.LSym  // functions to call for package initialization
--- a/src/cmd/compile/internal/ppc64/ggen.go
+++ b/src/cmd/compile/internal/ppc64/ggen.go
@ -19,17 +19,17 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
 	}
 	if cnt < int64(4*types.PtrSize) {
 		for i := int64(0); i < cnt; i += int64(types.PtrSize) {
-			p = pp.Append(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, base.Ctxt.FixedFrameSize()+off+i)
+			p = pp.Append(p, ppc64.AMOVD, obj.TYPE_REG, ppc64.REGZERO, 0, obj.TYPE_MEM, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize+off+i)
 		}
 	} else if cnt <= int64(128*types.PtrSize) {
-		p = pp.Append(p, ppc64.AADD, obj.TYPE_CONST, 0, base.Ctxt.FixedFrameSize()+off-8, obj.TYPE_REG, ppc64.REGRT1, 0)
+		p = pp.Append(p, ppc64.AADD, obj.TYPE_CONST, 0, base.Ctxt.Arch.FixedFrameSize+off-8, obj.TYPE_REG, ppc64.REGRT1, 0)
 		p.Reg = ppc64.REGSP
 		p = pp.Append(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
 		p.To.Name = obj.NAME_EXTERN
 		p.To.Sym = ir.Syms.Duffzero
 		p.To.Offset = 4 * (128 - cnt/int64(types.PtrSize))
 	} else {
-		p = pp.Append(p, ppc64.AMOVD, obj.TYPE_CONST, 0, base.Ctxt.FixedFrameSize()+off-8, obj.TYPE_REG, ppc64.REGTMP, 0)
+		p = pp.Append(p, ppc64.AMOVD, obj.TYPE_CONST, 0, base.Ctxt.Arch.FixedFrameSize+off-8, obj.TYPE_REG, ppc64.REGTMP, 0)
 		p = pp.Append(p, ppc64.AADD, obj.TYPE_REG, ppc64.REGTMP, 0, obj.TYPE_REG, ppc64.REGRT1, 0)
 		p.Reg = ppc64.REGSP
 		p = pp.Append(p, ppc64.AMOVD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, ppc64.REGTMP, 0)
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@ -476,7 +476,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(ppc64.AMOVD)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
@ -509,7 +509,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		for _, a := range v.Block.Func.RegArgs {
 			// Pass the spill/unspill information along to the assembler, offset by size of
 			// the saved LR slot.
-			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.FixedFrameSize())
+			addr := ssagen.SpillSlotAddr(a, ppc64.REGSP, base.Ctxt.Arch.FixedFrameSize)
 			s.FuncInfo().AddSpill(
 				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
 		}
--- a/src/cmd/compile/internal/reflectdata/alg.go
+++ b/src/cmd/compile/internal/reflectdata/alg.go
@ -412,22 +412,25 @@ func geneq(t *types.Type) *obj.LSym {
 		//
 		// if eq(p[0], q[0]) && eq(p[1], q[1]) && ... {
 		// } else {
-		//   return
+		//   goto neq
 		// }
 		//
 		// And so on.
 		//
 		// Otherwise it generates:
 		//
-		// for i := 0; i < nelem; i++ {
-		//   if eq(p[i], q[i]) {
+		// iterateTo := nelem/unroll*unroll
+		// for i := 0; i < iterateTo; i += unroll {
+		//   if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) {
 		//   } else {
 		//     goto neq
 		//   }
 		// }
+		// if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... {
+		// } else {
+		//    goto neq
+		// }
 		//
-		// TODO(josharian): consider doing some loop unrolling
-		// for larger nelem as well, processing a few elements at a time in a loop.
 		checkAll := func(unroll int64, last bool, eq func(pi, qi ir.Node) ir.Node) {
 			// checkIdx generates a node to check for equality at index i.
 			checkIdx := func(i ir.Node) ir.Node {
@ -442,38 +445,62 @@ func geneq(t *types.Type) *obj.LSym {
 				return eq(pi, qi)
 			}

-			if nelem <= unroll {
-				if last {
-					// Do last comparison in a different manner.
-					nelem--
-				}
-				// Generate a series of checks.
-				for i := int64(0); i < nelem; i++ {
-					// if check {} else { goto neq }
-					nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(i)), nil, nil)
-					nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
-					fn.Body.Append(nif)
-				}
-				if last {
-					fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem))))
-				}
-			} else {
-				// Generate a for loop.
-				// for i := 0; i < nelem; i++
+			iterations := nelem / unroll
+			iterateTo := iterations * unroll
+			// If a loop is iterated only once, there shouldn't be any loop at all.
+			if iterations == 1 {
+				iterateTo = 0
+			}
+
+			if iterateTo > 0 {
+				// Generate an unrolled for loop.
+				// for i := 0; i < nelem/unroll*unroll; i += unroll
 				i := typecheck.Temp(types.Types[types.TINT])
 				init := ir.NewAssignStmt(base.Pos, i, ir.NewInt(0))
-				cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(nelem))
-				post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1)))
-				loop := ir.NewForStmt(base.Pos, nil, cond, post, nil)
+				cond := ir.NewBinaryExpr(base.Pos, ir.OLT, i, ir.NewInt(iterateTo))
+				loop := ir.NewForStmt(base.Pos, nil, cond, nil, nil)
 				loop.PtrInit().Append(init)
-				// if eq(pi, qi) {} else { goto neq }
-				nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil)
-				nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
-				loop.Body.Append(nif)
-				fn.Body.Append(loop)
-				if last {
-					fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true)))
+
+				// if eq(p[i+0], q[i+0]) && eq(p[i+1], q[i+1]) && ... && eq(p[i+unroll-1], q[i+unroll-1]) {
+				// } else {
+				//   goto neq
+				// }
+				for j := int64(0); j < unroll; j++ {
+					// if check {} else { goto neq }
+					nif := ir.NewIfStmt(base.Pos, checkIdx(i), nil, nil)
+					nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
+					loop.Body.Append(nif)
+					post := ir.NewAssignStmt(base.Pos, i, ir.NewBinaryExpr(base.Pos, ir.OADD, i, ir.NewInt(1)))
+					loop.Body.Append(post)
 				}
+
+				fn.Body.Append(loop)
+
+				if nelem == iterateTo {
+					if last {
+						fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, ir.NewBool(true)))
+					}
+					return
+				}
+			}
+
+			// Generate remaining checks, if nelem is not a multiple of unroll.
+			if last {
+				// Do last comparison in a different manner.
+				nelem--
+			}
+			// if eq(p[iterateTo+0], q[iterateTo+0]) && eq(p[iterateTo+1], q[iterateTo+1]) && ... {
+			// } else {
+			//    goto neq
+			// }
+			for j := iterateTo; j < nelem; j++ {
+				// if check {} else { goto neq }
+				nif := ir.NewIfStmt(base.Pos, checkIdx(ir.NewInt(j)), nil, nil)
+				nif.Else.Append(ir.NewBranchStmt(base.Pos, ir.OGOTO, neq))
+				fn.Body.Append(nif)
+			}
+			if last {
+				fn.Body.Append(ir.NewAssignStmt(base.Pos, nr, checkIdx(ir.NewInt(nelem))))
 			}
 		}

@ -481,7 +508,6 @@ func geneq(t *types.Type) *obj.LSym {
 		case types.TSTRING:
 			// Do two loops. First, check that all the lengths match (cheap).
 			// Second, check that all the contents match (expensive).
-			// TODO: when the array size is small, unroll the length match checks.
 			checkAll(3, false, func(pi, qi ir.Node) ir.Node {
 				// Compare lengths.
 				eqlen, _ := EqString(pi, qi)
@ -655,7 +681,8 @@ func anyCall(fn *ir.Func) bool {
 }

 // eqfield returns the node
-// 	p.field == q.field
+//
+//	p.field == q.field
 func eqfield(p ir.Node, q ir.Node, field *types.Sym) ir.Node {
 	nx := ir.NewSelectorExpr(base.Pos, ir.OXDOT, p, field)
 	ny := ir.NewSelectorExpr(base.Pos, ir.OXDOT, q, field)
@ -664,9 +691,13 @@ func eqfield(p ir.Node, q ir.Node, field *types.Sym) ir.Node {
 }

 // EqString returns the nodes
-//   len(s) == len(t)
+//
+//	len(s) == len(t)
+//
 // and
-//   memequal(s.ptr, t.ptr, len(s))
+//
+//	memequal(s.ptr, t.ptr, len(s))
+//
 // which can be used to construct string equality comparison.
 // eqlen must be evaluated before eqmem, and shortcircuiting is required.
 func EqString(s, t ir.Node) (eqlen *ir.BinaryExpr, eqmem *ir.CallExpr) {
@ -688,9 +719,13 @@ func EqString(s, t ir.Node) (eqlen *ir.BinaryExpr, eqmem *ir.CallExpr) {
 }

 // EqInterface returns the nodes
-//   s.tab == t.tab (or s.typ == t.typ, as appropriate)
+//
+//	s.tab == t.tab (or s.typ == t.typ, as appropriate)
+//
 // and
-//   ifaceeq(s.tab, s.data, t.data) (or efaceeq(s.typ, s.data, t.data), as appropriate)
+//
+//	ifaceeq(s.tab, s.data, t.data) (or efaceeq(s.typ, s.data, t.data), as appropriate)
+//
 // which can be used to construct interface equality comparison.
 // eqtab must be evaluated before eqdata, and shortcircuiting is required.
 func EqInterface(s, t ir.Node) (eqtab *ir.BinaryExpr, eqdata *ir.CallExpr) {
@ -724,7 +759,8 @@ func EqInterface(s, t ir.Node) (eqtab *ir.BinaryExpr, eqdata *ir.CallExpr) {
 }

 // eqmem returns the node
-// 	memequal(&p.field, &q.field [, size])
+//
+//	memequal(&p.field, &q.field [, size])
 func eqmem(p ir.Node, q ir.Node, field *types.Sym, size int64) ir.Node {
 	nx := typecheck.Expr(typecheck.NodAddr(ir.NewSelectorExpr(base.Pos, ir.OXDOT, p, field)))
 	ny := typecheck.Expr(typecheck.NodAddr(ir.NewSelectorExpr(base.Pos, ir.OXDOT, q, field)))
--- a/src/cmd/compile/internal/reflectdata/alg_test.go
+++ b/src/cmd/compile/internal/reflectdata/alg_test.go
@ -0,0 +1,76 @@
+// Copyright 2021 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reflectdata_test
+
+import "testing"
+
+func BenchmarkEqArrayOfStrings5(b *testing.B) {
+	var a [5]string
+	var c [5]string
+
+	for i := 0; i < 5; i++ {
+		a[i] = "aaaa"
+		c[i] = "cccc"
+	}
+
+	for j := 0; j < b.N; j++ {
+		_ = a == c
+	}
+}
+
+func BenchmarkEqArrayOfStrings64(b *testing.B) {
+	var a [64]string
+	var c [64]string
+
+	for i := 0; i < 64; i++ {
+		a[i] = "aaaa"
+		c[i] = "cccc"
+	}
+
+	for j := 0; j < b.N; j++ {
+		_ = a == c
+	}
+}
+
+func BenchmarkEqArrayOfStrings1024(b *testing.B) {
+	var a [1024]string
+	var c [1024]string
+
+	for i := 0; i < 1024; i++ {
+		a[i] = "aaaa"
+		c[i] = "cccc"
+	}
+
+	for j := 0; j < b.N; j++ {
+		_ = a == c
+	}
+}
+
+func BenchmarkEqArrayOfFloats5(b *testing.B) {
+	var a [5]float32
+	var c [5]float32
+
+	for i := 0; i < b.N; i++ {
+		_ = a == c
+	}
+}
+
+func BenchmarkEqArrayOfFloats64(b *testing.B) {
+	var a [64]float32
+	var c [64]float32
+
+	for i := 0; i < b.N; i++ {
+		_ = a == c
+	}
+}
+
+func BenchmarkEqArrayOfFloats1024(b *testing.B) {
+	var a [1024]float32
+	var c [1024]float32
+
+	for i := 0; i < b.N; i++ {
+		_ = a == c
+	}
+}
--- a/src/cmd/compile/internal/reflectdata/reflect.go
+++ b/src/cmd/compile/internal/reflectdata/reflect.go
@ -667,10 +667,10 @@ var kinds = []int{
 // tflag is documented in reflect/type.go.
 //
 // tflag values must be kept in sync with copies in:
-//	- cmd/compile/internal/reflectdata/reflect.go
-//	- cmd/link/internal/ld/decodesym.go
-//	- reflect/type.go
-//	- runtime/type.go
+//   - cmd/compile/internal/reflectdata/reflect.go
+//   - cmd/link/internal/ld/decodesym.go
+//   - reflect/type.go
+//   - runtime/type.go
 const (
 	tflagUncommon      = 1 << 0
 	tflagExtraStar     = 1 << 1
@ -1355,21 +1355,21 @@ func writeITab(lsym *obj.LSym, typ, iface *types.Type, allowNonImplement bool) {
 	// type itab struct {
 	//   inter  *interfacetype
 	//   _type  *_type
-	//   hash   uint32
+	//   hash   uint32 // copy of _type.hash. Used for type switches.
 	//   _      [4]byte
-	//   fun    [1]uintptr // variable sized
+	//   fun    [1]uintptr // variable sized. fun[0]==0 means _type does not implement inter.
 	// }
 	o := objw.SymPtr(lsym, 0, writeType(iface), 0)
 	o = objw.SymPtr(lsym, o, writeType(typ), 0)
 	o = objw.Uint32(lsym, o, types.TypeHash(typ)) // copy of type hash
 	o += 4                                        // skip unused field
+	if !completeItab {
+		// If typ doesn't implement iface, make method entries be zero.
+		o = objw.Uintptr(lsym, o, 0)
+		entries = entries[:0]
+	}
 	for _, fn := range entries {
-		if !completeItab {
-			// If typ doesn't implement iface, make method entries be zero.
-			o = objw.Uintptr(lsym, o, 0)
-		} else {
-			o = objw.SymPtrWeak(lsym, o, fn, 0) // method pointer for each method
-		}
+		o = objw.SymPtrWeak(lsym, o, fn, 0) // method pointer for each method
 	}
 	// Nothing writes static itabs, so they are read only.
 	objw.Global(lsym, int32(o), int16(obj.DUPOK|obj.RODATA))
@ -1821,13 +1821,17 @@ func NeedEmit(typ *types.Type) bool {
 // Also wraps methods on instantiated generic types for use in itab entries.
 // For an instantiated generic type G[int], we generate wrappers like:
 // G[int] pointer shaped:
+//
 //	func (x G[int]) f(arg) {
 //		.inst.G[int].f(dictionary, x, arg)
-// 	}
+//	}
+//
 // G[int] not pointer shaped:
+//
 //	func (x *G[int]) f(arg) {
 //		.inst.G[int].f(dictionary, *x, arg)
-// 	}
+//	}
+//
 // These wrappers are always fully stenciled.
 func methodWrapper(rcvr *types.Type, method *types.Field, forItab bool) *obj.LSym {
 	orig := rcvr
--- a/src/cmd/compile/internal/riscv64/ggen.go
+++ b/src/cmd/compile/internal/riscv64/ggen.go
@ -19,7 +19,7 @@ func zeroRange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
 	}

 	// Adjust the frame to account for LR.
-	off += base.Ctxt.FixedFrameSize()
+	off += base.Ctxt.Arch.FixedFrameSize

 	if cnt < int64(4*types.PtrSize) {
 		for i := int64(0); i < cnt; i += int64(types.PtrSize) {
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@ -237,7 +237,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		for _, a := range v.Block.Func.RegArgs {
 			// Pass the spill/unspill information along to the assembler, offset by size of
 			// the saved LR slot.
-			addr := ssagen.SpillSlotAddr(a, riscv.REG_SP, base.Ctxt.FixedFrameSize())
+			addr := ssagen.SpillSlotAddr(a, riscv.REG_SP, base.Ctxt.Arch.FixedFrameSize)
 			s.FuncInfo().AddSpill(
 				obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
 		}
@ -669,7 +669,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(riscv.AMOV)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/s390x/ggen.go
+++ b/src/cmd/compile/internal/s390x/ggen.go
@ -24,7 +24,7 @@ func zerorange(pp *objw.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog
 	}

 	// Adjust the frame to account for LR.
-	off += base.Ctxt.FixedFrameSize()
+	off += base.Ctxt.Arch.FixedFrameSize
 	reg := int16(s390x.REGSP)

 	// If the off cannot fit in a 12-bit unsigned displacement then we
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@ -132,7 +132,9 @@ func moveByType(t *types.Type) obj.As {
 }

 // opregreg emits instructions for
-//     dest := dest(To) op src(From)
+//
+//	dest := dest(To) op src(From)
+//
 // and also returns the created obj.Prog so it
 // may be further adjusted (offset, scale, etc).
 func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
@ -145,7 +147,9 @@ func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
 }

 // opregregimm emits instructions for
+//
 //	dest := src(From) op off
+//
 // and also returns the created obj.Prog so it
 // may be further adjusted (offset, scale, etc).
 func opregregimm(s *ssagen.State, op obj.As, dest, src int16, off int64) *obj.Prog {
@ -546,7 +550,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
 		// caller's SP is FixedFrameSize below the address of the first arg
 		p := s.Prog(s390x.AMOVD)
 		p.From.Type = obj.TYPE_ADDR
-		p.From.Offset = -base.Ctxt.FixedFrameSize()
+		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
 		p.From.Name = obj.NAME_PARAM
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@ -131,10 +131,14 @@ var needSplit = map[Op]bool{
 }

 // For each entry k, v in this map, if we have a value x with:
-//   x.Op == k[0]
-//   x.Args[0].Op == k[1]
+//
+//	x.Op == k[0]
+//	x.Args[0].Op == k[1]
+//
 // then we can set x.Op to v and set x.Args like this:
-//   x.Args[0].Args + x.Args[1:]
+//
+//	x.Args[0].Args + x.Args[1:]
+//
 // Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
 var combine = map[[2]Op]Op{
 	// amd64
@ -340,11 +344,18 @@ var combine = map[[2]Op]Op{
 	[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ1}: OpAMD64DIVSDloadidx1,
 	[2]Op{OpAMD64DIVSDload, OpAMD64LEAQ8}: OpAMD64DIVSDloadidx8,

+	[2]Op{OpAMD64SARXLload, OpAMD64ADDQ}: OpAMD64SARXLloadidx1,
+	[2]Op{OpAMD64SARXQload, OpAMD64ADDQ}: OpAMD64SARXQloadidx1,
 	[2]Op{OpAMD64SHLXLload, OpAMD64ADDQ}: OpAMD64SHLXLloadidx1,
 	[2]Op{OpAMD64SHLXQload, OpAMD64ADDQ}: OpAMD64SHLXQloadidx1,
 	[2]Op{OpAMD64SHRXLload, OpAMD64ADDQ}: OpAMD64SHRXLloadidx1,
 	[2]Op{OpAMD64SHRXQload, OpAMD64ADDQ}: OpAMD64SHRXQloadidx1,

+	[2]Op{OpAMD64SARXLload, OpAMD64LEAQ1}: OpAMD64SARXLloadidx1,
+	[2]Op{OpAMD64SARXLload, OpAMD64LEAQ4}: OpAMD64SARXLloadidx4,
+	[2]Op{OpAMD64SARXLload, OpAMD64LEAQ8}: OpAMD64SARXLloadidx8,
+	[2]Op{OpAMD64SARXQload, OpAMD64LEAQ1}: OpAMD64SARXQloadidx1,
+	[2]Op{OpAMD64SARXQload, OpAMD64LEAQ8}: OpAMD64SARXQloadidx8,
 	[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ1}: OpAMD64SHLXLloadidx1,
 	[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ4}: OpAMD64SHLXLloadidx4,
 	[2]Op{OpAMD64SHLXLload, OpAMD64LEAQ8}: OpAMD64SHLXLloadidx8,
@ -356,6 +367,26 @@ var combine = map[[2]Op]Op{
 	[2]Op{OpAMD64SHRXQload, OpAMD64LEAQ1}: OpAMD64SHRXQloadidx1,
 	[2]Op{OpAMD64SHRXQload, OpAMD64LEAQ8}: OpAMD64SHRXQloadidx8,

+	// amd64/v3
+	[2]Op{OpAMD64MOVBELload, OpAMD64ADDQ}:  OpAMD64MOVBELloadidx1,
+	[2]Op{OpAMD64MOVBEQload, OpAMD64ADDQ}:  OpAMD64MOVBEQloadidx1,
+	[2]Op{OpAMD64MOVBELload, OpAMD64LEAQ1}: OpAMD64MOVBELloadidx1,
+	[2]Op{OpAMD64MOVBELload, OpAMD64LEAQ4}: OpAMD64MOVBELloadidx4,
+	[2]Op{OpAMD64MOVBELload, OpAMD64LEAQ8}: OpAMD64MOVBELloadidx8,
+	[2]Op{OpAMD64MOVBEQload, OpAMD64LEAQ1}: OpAMD64MOVBEQloadidx1,
+	[2]Op{OpAMD64MOVBEQload, OpAMD64LEAQ8}: OpAMD64MOVBEQloadidx8,
+
+	[2]Op{OpAMD64MOVBEWstore, OpAMD64ADDQ}:  OpAMD64MOVBEWstoreidx1,
+	[2]Op{OpAMD64MOVBELstore, OpAMD64ADDQ}:  OpAMD64MOVBELstoreidx1,
+	[2]Op{OpAMD64MOVBEQstore, OpAMD64ADDQ}:  OpAMD64MOVBEQstoreidx1,
+	[2]Op{OpAMD64MOVBEWstore, OpAMD64LEAQ1}: OpAMD64MOVBEWstoreidx1,
+	[2]Op{OpAMD64MOVBEWstore, OpAMD64LEAQ2}: OpAMD64MOVBEWstoreidx2,
+	[2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ1}: OpAMD64MOVBELstoreidx1,
+	[2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ4}: OpAMD64MOVBELstoreidx4,
+	[2]Op{OpAMD64MOVBELstore, OpAMD64LEAQ8}: OpAMD64MOVBELstoreidx8,
+	[2]Op{OpAMD64MOVBEQstore, OpAMD64LEAQ1}: OpAMD64MOVBEQstoreidx1,
+	[2]Op{OpAMD64MOVBEQstore, OpAMD64LEAQ8}: OpAMD64MOVBEQstoreidx8,
+
 	// 386
 	[2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
 	[2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
--- a/src/cmd/compile/internal/ssa/block.go
+++ b/src/cmd/compile/internal/ssa/block.go
@ -71,19 +71,25 @@ type Block struct {
 // Edge represents a CFG edge.
 // Example edges for b branching to either c or d.
 // (c and d have other predecessors.)
-//   b.Succs = [{c,3}, {d,1}]
-//   c.Preds = [?, ?, ?, {b,0}]
-//   d.Preds = [?, {b,1}, ?]
+//
+//	b.Succs = [{c,3}, {d,1}]
+//	c.Preds = [?, ?, ?, {b,0}]
+//	d.Preds = [?, {b,1}, ?]
+//
 // These indexes allow us to edit the CFG in constant time.
 // In addition, it informs phi ops in degenerate cases like:
-//  b:
-//     if k then c else c
-//  c:
-//     v = Phi(x, y)
+//
+//	b:
+//	   if k then c else c
+//	c:
+//	   v = Phi(x, y)
+//
 // Then the indexes tell you whether x is chosen from
 // the if or else branch from b.
-//   b.Succs = [{c,0},{c,1}]
-//   c.Preds = [{b,0},{b,1}]
+//
+//	b.Succs = [{c,0},{c,1}]
+//	c.Preds = [{b,0},{b,1}]
+//
 // means x is chosen if k is true.
 type Edge struct {
 	// block edge goes to (in a Succs list) or from (in a Preds list)
@ -106,12 +112,13 @@ func (e Edge) String() string {
 }

 // BlockKind is the kind of SSA block.
-//     kind          controls        successors
-//   ------------------------------------------
-//     Exit      [return mem]                []
-//    Plain                []            [next]
-//       If   [boolean Value]      [then, else]
-//    Defer             [mem]  [nopanic, panic]  (control opcode should be OpStaticCall to runtime.deferproc)
+//
+//	  kind          controls        successors
+//	------------------------------------------
+//	  Exit      [return mem]                []
+//	 Plain                []            [next]
+//	    If   [boolean Value]      [then, else]
+//	 Defer             [mem]  [nopanic, panic]  (control opcode should be OpStaticCall to runtime.deferproc)
 type BlockKind int8

 // short form print
@ -330,10 +337,12 @@ func (b *Block) swapSuccessors() {
 //
 // b.removePred(i)
 // for _, v := range b.Values {
-//     if v.Op != OpPhi {
-//         continue
-//     }
-//     b.removeArg(v, i)
+//
+//	if v.Op != OpPhi {
+//	    continue
+//	}
+//	b.removeArg(v, i)
+//
 // }
 func (b *Block) removePhiArg(phi *Value, i int) {
 	n := len(b.Preds)
--- a/src/cmd/compile/internal/ssa/branchelim.go
+++ b/src/cmd/compile/internal/ssa/branchelim.go
@ -11,11 +11,11 @@ import "cmd/internal/src"
 //
 // Search for basic blocks that look like
 //
-//  bb0            bb0
-//   | \          /   \
-//   | bb1  or  bb1   bb2    <- trivial if/else blocks
-//   | /          \   /
-//  bb2            bb3
+//	bb0            bb0
+//	 | \          /   \
+//	 | bb1  or  bb1   bb2    <- trivial if/else blocks
+//	 | /          \   /
+//	bb2            bb3
 //
 // where the intermediate blocks are mostly empty (with no side-effects);
 // rewrite Phis in the postdominator as CondSelects.
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@ -100,6 +100,10 @@ func checkFunc(f *Func) {
 			if b.NumControls() != 0 {
 				f.Fatalf("plain/dead block %s has a control value", b)
 			}
+		case BlockJumpTable:
+			if b.NumControls() != 1 {
+				f.Fatalf("jumpTable block %s has no control value", b)
+			}
 		}
 		if len(b.Succs) != 2 && b.Likely != BranchUnknown {
 			f.Fatalf("likeliness prediction %d for block %s with %d successors", b.Likely, b, len(b.Succs))
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@ -250,8 +250,8 @@ var GenssaDump map[string]bool = make(map[string]bool) // names of functions to
 // version is used as a regular expression to match the phase name(s).
 //
 // Special cases that have turned out to be useful:
-//  - ssa/check/on enables checking after each phase
-//  - ssa/all/time enables time reporting for all phases
+//   - ssa/check/on enables checking after each phase
+//   - ssa/all/time enables time reporting for all phases
 //
 // See gc/lex.go for dissection of the option string.
 // Example uses:
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@ -168,6 +168,9 @@ type Frontend interface {

 	// MyImportPath provides the import name (roughly, the package) for the function being compiled.
 	MyImportPath() string
+
+	// LSym returns the linker symbol of the function being compiled.
+	LSym() string
 }

 // NewConfig returns a new configuration object for the given architecture.
@ -297,8 +300,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 		c.registers = registersRISCV64[:]
 		c.gpRegMask = gpRegMaskRISCV64
 		c.fpRegMask = fpRegMaskRISCV64
-		// c.intParamRegs = paramIntRegRISCV64
-		// c.floatParamRegs = paramFloatRegRISCV64
+		c.intParamRegs = paramIntRegRISCV64
+		c.floatParamRegs = paramFloatRegRISCV64
 		c.FPReg = framepointerRegRISCV64
 		c.hasGReg = true
 	case "wasm":
@ -329,8 +332,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
 		c.floatParamRegs = nil // no FP registers in softfloat mode
 	}

-	c.ABI0 = abi.NewABIConfig(0, 0, ctxt.FixedFrameSize())
-	c.ABI1 = abi.NewABIConfig(len(c.intParamRegs), len(c.floatParamRegs), ctxt.FixedFrameSize())
+	c.ABI0 = abi.NewABIConfig(0, 0, ctxt.Arch.FixedFrameSize)
+	c.ABI1 = abi.NewABIConfig(len(c.intParamRegs), len(c.floatParamRegs), ctxt.Arch.FixedFrameSize)

 	// On Plan 9, floating point operations are not allowed in note handler.
 	if buildcfg.GOOS == "plan9" {
--- a/src/cmd/compile/internal/ssa/cse.go
+++ b/src/cmd/compile/internal/ssa/cse.go
@ -235,14 +235,15 @@ type eqclass []*Value

 // partitionValues partitions the values into equivalence classes
 // based on having all the following features match:
-//  - opcode
-//  - type
-//  - auxint
-//  - aux
-//  - nargs
-//  - block # if a phi op
-//  - first two arg's opcodes and auxint
-//  - NOT first two arg's aux; that can break CSE.
+//   - opcode
+//   - type
+//   - auxint
+//   - aux
+//   - nargs
+//   - block # if a phi op
+//   - first two arg's opcodes and auxint
+//   - NOT first two arg's aux; that can break CSE.
+//
 // partitionValues returns a list of equivalence classes, each
 // being a sorted by ID list of *Values. The eqclass slices are
 // backed by the same storage as the input slice.
--- a/src/cmd/compile/internal/ssa/debug.go
+++ b/src/cmd/compile/internal/ssa/debug.go
@ -402,28 +402,28 @@ func (sc *slotCanonicalizer) canonSlot(idx SlKeyIdx) LocalSlot {
 // OpArg{Int,Float}Reg values, inserting additional values in
 // cases where they are missing. Example:
 //
-//      func foo(s string, used int, notused int) int {
-//        return len(s) + used
-//      }
+//	func foo(s string, used int, notused int) int {
+//	  return len(s) + used
+//	}
 //
 // In the function above, the incoming parameter "used" is fully live,
 // "notused" is not live, and "s" is partially live (only the length
 // field of the string is used). At the point where debug value
 // analysis runs, we might expect to see an entry block with:
 //
-//   b1:
-//     v4 = ArgIntReg <uintptr> {s+8} [0] : BX
-//     v5 = ArgIntReg <int> {used} [0] : CX
+//	b1:
+//	  v4 = ArgIntReg <uintptr> {s+8} [0] : BX
+//	  v5 = ArgIntReg <int> {used} [0] : CX
 //
 // While this is an accurate picture of the live incoming params,
 // we also want to have debug locations for non-live params (or
 // their non-live pieces), e.g. something like
 //
-//   b1:
-//     v9 = ArgIntReg <*uint8> {s+0} [0] : AX
-//     v4 = ArgIntReg <uintptr> {s+8} [0] : BX
-//     v5 = ArgIntReg <int> {used} [0] : CX
-//     v10 = ArgIntReg <int> {unused} [0] : DI
+//	b1:
+//	  v9 = ArgIntReg <*uint8> {s+0} [0] : AX
+//	  v4 = ArgIntReg <uintptr> {s+8} [0] : BX
+//	  v5 = ArgIntReg <int> {used} [0] : CX
+//	  v10 = ArgIntReg <int> {unused} [0] : DI
 //
 // This function examines the live OpArg{Int,Float}Reg values and
 // synthesizes new (dead) values for the non-live params or the
@ -1489,14 +1489,14 @@ func setupLocList(ctxt *obj.Link, f *Func, list []byte, st, en ID) ([]byte, int)
 // that spills a register arg. It returns the ID of that instruction
 // Example:
 //
-//   b1:
-//       v3 = ArgIntReg <int> {p1+0} [0] : AX
-//       ... more arg regs ..
-//       v4 = ArgFloatReg <float32> {f1+0} [0] : X0
-//       v52 = MOVQstore <mem> {p1} v2 v3 v1
-//       ... more stores ...
-//       v68 = MOVSSstore <mem> {f4} v2 v67 v66
-//       v38 = MOVQstoreconst <mem> {blob} [val=0,off=0] v2 v32
+//	b1:
+//	    v3 = ArgIntReg <int> {p1+0} [0] : AX
+//	    ... more arg regs ..
+//	    v4 = ArgFloatReg <float32> {f1+0} [0] : X0
+//	    v52 = MOVQstore <mem> {p1} v2 v3 v1
+//	    ... more stores ...
+//	    v68 = MOVSSstore <mem> {f4} v2 v67 v66
+//	    v38 = MOVQstoreconst <mem> {blob} [val=0,off=0] v2 v32
 //
 // Important: locatePrologEnd is expected to work properly only with
 // optimization turned off (e.g. "-N"). If optimization is enabled
--- a/src/cmd/compile/internal/ssa/debug_test.go
+++ b/src/cmd/compile/internal/ssa/debug_test.go
@ -84,7 +84,7 @@ var optimizedLibs = (!strings.Contains(gogcflags, "-N") && !strings.Contains(gog
 // "O" is an explicit indication that we expect it to be optimized out.
 // For example:
 //
-// 	if len(os.Args) > 1 { //gdb-dbg=(hist/A,cannedInput/A) //dlv-dbg=(hist/A,cannedInput/A)
+//	if len(os.Args) > 1 { //gdb-dbg=(hist/A,cannedInput/A) //dlv-dbg=(hist/A,cannedInput/A)
 //
 // TODO: not implemented for Delve yet, but this is the plan
 //
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@ -656,15 +656,16 @@ outer:
 // It decomposes a Load or an Arg into smaller parts and returns the new mem.
 // If the type does not match one of the expected aggregate types, it returns nil instead.
 // Parameters:
-//  pos           -- the location of any generated code.
-//  b             -- the block into which any generated code should normally be placed
-//  source        -- the value, possibly an aggregate, to be stored.
-//  mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
-//  t             -- the type of the value to be stored
-//  storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset
-//  loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
-//  storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
-//                   StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
+//
+//	pos           -- the location of any generated code.
+//	b             -- the block into which any generated code should normally be placed
+//	source        -- the value, possibly an aggregate, to be stored.
+//	mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
+//	t             -- the type of the value to be stored
+//	storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + storeOffset
+//	loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
+//	storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
+//	                 StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
 func (x *expandState) decomposeArg(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {

 	pa := x.prAssignForArg(source)
@ -777,15 +778,16 @@ func (x *expandState) splitSlotsIntoNames(locs []*LocalSlot, suffix string, off
 // It decomposes a Load  into smaller parts and returns the new mem.
 // If the type does not match one of the expected aggregate types, it returns nil instead.
 // Parameters:
-//  pos           -- the location of any generated code.
-//  b             -- the block into which any generated code should normally be placed
-//  source        -- the value, possibly an aggregate, to be stored.
-//  mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
-//  t             -- the type of the value to be stored
-//  storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + offset
-//  loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
-//  storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
-//                   StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
+//
+//	pos           -- the location of any generated code.
+//	b             -- the block into which any generated code should normally be placed
+//	source        -- the value, possibly an aggregate, to be stored.
+//	mem           -- the mem flowing into this decomposition (loads depend on it, stores updated it)
+//	t             -- the type of the value to be stored
+//	storeOffset   -- if the value is stored in memory, it is stored at base (see storeRc) + offset
+//	loadRegOffset -- regarding source as a value in registers, the register offset in ABI1.  Meaningful only if source is OpArg.
+//	storeRc       -- storeRC; if the value is stored in registers, this specifies the registers.
+//	                 StoreRc also identifies whether the target is registers or memory, and has the base for the store operation.
 //
 // TODO -- this needs cleanup; it just works for SSA-able aggregates, and won't fully generalize to register-args aggregates.
 func (x *expandState) decomposeLoad(pos src.XPos, b *Block, source, mem *Value, t *types.Type, storeOffset int64, loadRegOffset Abi1RO, storeRc registerCursor) *Value {
@ -1106,7 +1108,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) {
 				a0 := a.Args[0]
 				if a0.Op == OpLocalAddr {
 					n := a0.Aux.(*ir.Name)
-					if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.FixedFrameSize() == aOffset {
+					if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset {
 						continue
 					}
 				}
@ -1127,7 +1129,7 @@ func (x *expandState) rewriteArgs(v *Value, firstArg int) {
 				// It's common for a tail call passing the same arguments (e.g. method wrapper),
 				// so this would be a self copy. Detect this and optimize it out.
 				n := a.Aux.(*ir.Name)
-				if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.FixedFrameSize() == aOffset {
+				if n.Class == ir.PPARAM && n.FrameOffset()+x.f.Config.ctxt.Arch.FixedFrameSize == aOffset {
 					continue
 				}
 			}
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@ -102,6 +102,9 @@ func (d TestFrontend) Debug_checknil() bool                               { retu
 func (d TestFrontend) MyImportPath() string {
 	return "my/import/path"
 }
+func (d TestFrontend) LSym() string {
+	return "my/import/path.function"
+}

 var testTypes Types

--- a/src/cmd/compile/internal/ssa/func.go
+++ b/src/cmd/compile/internal/ssa/func.go
@ -820,17 +820,22 @@ func (f *Func) invalidateCFG() {
 }

 // DebugHashMatch reports whether environment variable evname
-//  1) is empty (this is a special more-quickly implemented case of 3)
-//  2) is "y" or "Y"
-//  3) is a suffix of the sha1 hash of name
-//  4) is a suffix of the environment variable
+//  1. is empty (this is a special more-quickly implemented case of 3)
+//  2. is "y" or "Y"
+//  3. is a suffix of the sha1 hash of name
+//  4. is a suffix of the environment variable
 //     fmt.Sprintf("%s%d", evname, n)
 //     provided that all such variables are nonempty for 0 <= i <= n
+//
 // Otherwise it returns false.
 // When true is returned the message
-//  "%s triggered %s\n", evname, name
+//
+//	"%s triggered %s\n", evname, name
+//
 // is printed on the file named in environment variable
-//  GSHS_LOGFILE
+//
+//	GSHS_LOGFILE
+//
 // or standard out if that is empty or there is an error
 // opening the file.
 func (f *Func) DebugHashMatch(evname string) bool {
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@ -55,19 +55,21 @@ func fuse(f *Func, typ fuseType) {

 // fuseBlockIf handles the following cases where s0 and s1 are empty blocks.
 //
-//       b        b           b       b
-//    \ / \ /    | \  /    \ / |     | |
-//     s0  s1    |  s1      s0 |     | |
-//      \ /      | /         \ |     | |
-//       ss      ss           ss      ss
+//	   b        b           b       b
+//	\ / \ /    | \  /    \ / |     | |
+//	 s0  s1    |  s1      s0 |     | |
+//	  \ /      | /         \ |     | |
+//	   ss      ss           ss      ss
 //
 // If all Phi ops in ss have identical variables for slots corresponding to
 // s0, s1 and b then the branch can be dropped.
 // This optimization often comes up in switch statements with multiple
 // expressions in a case clause:
-//   switch n {
-//     case 1,2,3: return 4
-//   }
+//
+//	switch n {
+//	  case 1,2,3: return 4
+//	}
+//
 // TODO: If ss doesn't contain any OpPhis, are s0 and s1 dead code anyway.
 func fuseBlockIf(b *Block) bool {
 	if b.Kind != BlockIf {
--- a/src/cmd/compile/internal/ssa/fuse_branchredirect.go
+++ b/src/cmd/compile/internal/ssa/fuse_branchredirect.go
@ -8,21 +8,24 @@ package ssa
 // of an If block can be derived from its predecessor If block, in
 // some such cases, we can redirect the predecessor If block to the
 // corresponding successor block directly. For example:
-//  p:
-//    v11 = Less64 <bool> v10 v8
-//    If v11 goto b else u
-//  b: <- p ...
-//    v17 = Leq64 <bool> v10 v8
-//    If v17 goto s else o
+//
+//	p:
+//	  v11 = Less64 <bool> v10 v8
+//	  If v11 goto b else u
+//	b: <- p ...
+//	  v17 = Leq64 <bool> v10 v8
+//	  If v17 goto s else o
+//
 // We can redirect p to s directly.
 //
 // The implementation here borrows the framework of the prove pass.
-//  1, Traverse all blocks of function f to find If blocks.
-//  2,   For any If block b, traverse all its predecessors to find If blocks.
-//  3,     For any If block predecessor p, update relationship p->b.
-//  4,     Traverse all successors of b.
-//  5,       For any successor s of b, try to update relationship b->s, if a
-//           contradiction is found then redirect p to another successor of b.
+//
+//	1, Traverse all blocks of function f to find If blocks.
+//	2,   For any If block b, traverse all its predecessors to find If blocks.
+//	3,     For any If block predecessor p, update relationship p->b.
+//	4,     Traverse all successors of b.
+//	5,       For any successor s of b, try to update relationship b->s, if a
+//	         contradiction is found then redirect p to another successor of b.
 func fuseBranchRedirect(f *Func) bool {
 	ft := newFactsTable(f)
 	ft.checkpoint()
--- a/src/cmd/compile/internal/ssa/fuse_comparisons.go
+++ b/src/cmd/compile/internal/ssa/fuse_comparisons.go
@ -9,22 +9,22 @@ package ssa
 //
 // Look for branch structure like:
 //
-//   p
-//   |\
-//   | b
-//   |/ \
-//   s0 s1
+//	p
+//	|\
+//	| b
+//	|/ \
+//	s0 s1
 //
 // In our example, p has control '1 <= x', b has control 'x < 5',
 // and s0 and s1 are the if and else results of the comparison.
 //
 // This will be optimized into:
 //
-//   p
-//    \
-//     b
-//    / \
-//   s0 s1
+//	p
+//	 \
+//	  b
+//	 / \
+//	s0 s1
 //
 // where b has the combined control value 'unsigned(x-1) < 4'.
 // Later passes will then fuse p and b.
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@ -206,6 +206,11 @@
 (Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
 (Rsh8x(64|32|16|8) x y)  && shiftIsBounded(v) => (SARB x y)

+// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
+(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
+(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
+(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
+
 // Lowering integer comparisons
 (Less(64|32|16|8)      x y) => (SETL  (CMP(Q|L|W|B)     x y))
 (Less(64|32|16|8)U     x y) => (SETB  (CMP(Q|L|W|B)     x y))
@ -512,6 +517,8 @@

 (If cond yes no) => (NE (TESTB cond cond) yes no)

+(JumpTable idx) => (JUMPTABLE {makeJumpTableSym(b)} idx (LEAQ <typ.Uintptr> {makeJumpTableSym(b)} (SB)))
+
 // Atomic loads.  Other than preserving their ordering with respect to other loads, nothing special here.
 (AtomicLoad8 ptr mem) => (MOVBatomicload ptr mem)
 (AtomicLoad32 ptr mem) => (MOVLatomicload ptr mem)
@ -590,6 +597,8 @@
 // mutandis, for UGE and SETAE, and CC and SETCC.
 ((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
 ((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
+((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
+((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
 ((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
    => ((ULT|UGE) (BTLconst [int8(log32(c))] x))
 ((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@ -598,6 +607,8 @@
    => ((ULT|UGE) (BTQconst [int8(log64(c))] x))
 (SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
 (SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
+(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE)  (BTL x y))
+(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE)  (BTQ x y))
 (SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
    => (SET(B|AE)  (BTLconst [int8(log32(c))] x))
 (SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@ -609,6 +620,10 @@
    => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
    => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTL x y) mem)
+(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
+    => (SET(B|AE)store  [off] {sym} ptr (BTQ x y) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
    => (SET(B|AE)store  [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
 (SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
@ -621,9 +636,10 @@
 (BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
 (BT(Q|L)const [c] (SHLQconst [d] x)) && c>d      => (BT(Q|L)const [c-d] x)
 (BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
+(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x)
 (BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
 (BTLconst [c] (SHLLconst [d] x)) && c>d      => (BTLconst [c-d] x)
-(BTLconst [0] s:(SHRL x y)) => (BTL y x)
+(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)

 // Rewrite a & 1 != 1 into a & 1 == 0.
 // Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
@ -635,6 +651,8 @@
 // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
 (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
 (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
+(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
+(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)

 // Convert ORconst into BTS, if the code gets smaller, with boundary being
 // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@ -650,6 +668,8 @@
 // Recognize bit clearing: a &^= 1<<b
 (AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
 (ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
+(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
+(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
 (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
    => (BTRQconst [int8(log32(^c))] x)
 (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
@ -791,6 +811,8 @@

 (SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
 (SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
+(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
+(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)

 (SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
 (SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
@ -798,33 +820,36 @@
 (SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
 (SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
 (SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
+(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
+(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)

 (SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
 (SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
 (SARW x (MOV(Q|L)const [c])) => (SARWconst [int8(min(int64(c)&31,15))] x)
 (SARB x (MOV(Q|L)const [c])) => (SARBconst [int8(min(int64(c)&31,7))] x)
-
+(SARXQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
+(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)

 // Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
-((SHLQ|SHRQ|SARQ) x (ADDQconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))

-((SHLL|SHRL|SARL) x (ADDQconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
-((SHLL|SHRL|SARL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))

-((SHLQ|SHRQ|SARQ) x (ADDLconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
-((SHLQ|SHRQ|SARQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x y)
-((SHLQ|SHRQ|SARQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0  => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
+((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))

-((SHLL|SHRL|SARL) x (ADDLconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL) x (NEGL <t> y))
-((SHLL|SHRL|SARL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL) x y)
-((SHLL|SHRL|SARL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL) x (NEGL <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0  => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
+((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))

 // Constant rotate instructions
 ((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
@ -856,9 +881,13 @@
 // it in order to strip it out.
 (ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
 (ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
+(ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
+(ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)

 (ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
 (ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
+(ORL (SHLXL x y) (ANDL (SHRXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
+(ORL (SHRXL x y) (ANDL (SHLXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)

 // Help with rotate detection
 (CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
@ -873,6 +902,15 @@
     (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
  && v.Type.Size() == 2
  => (RORW x y)
+(ORL (SHLXL x (AND(Q|L)const y [15]))
+     (ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
+           (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16]))))
+  && v.Type.Size() == 2
+  => (ROLW x y)
+(ORL (SHRW x (AND(Q|L)const y [15]))
+     (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
+  && v.Type.Size() == 2
+  => (RORW x y)

 (ORL (SHLL x (AND(Q|L)const y [ 7]))
     (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
@ -883,6 +921,15 @@
     (SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
  && v.Type.Size() == 1
  => (RORB x y)
+(ORL (SHLXL x (AND(Q|L)const y [ 7]))
+     (ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
+           (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
+  && v.Type.Size() == 1
+  => (ROLB x y)
+(ORL (SHRB x (AND(Q|L)const y [ 7]))
+     (SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
+  && v.Type.Size() == 1
+  => (RORB x y)

 // rotate left negative = rotate right
 (ROLQ x (NEG(Q|L) y)) => (RORQ x y)
@ -916,6 +963,7 @@

 // Multi-register shifts
 (ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
+(ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)

 // Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
 // because the x86 instructions are defined to use all 5 bits of the shift even
@ -2252,5 +2300,10 @@
  && clobber(x0, x1, sh)
  => @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)

-(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHLX(Q|L)load [off] {sym} ptr x mem)
-(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l)  => (SHRX(Q|L)load [off] {sym} ptr x mem)
+(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
+(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
+(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
+
+((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVQconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))
+((SHL|SHR|SAR)XQload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Qconst [int8(c&63)] (MOVQload [off] {sym} ptr mem))
+((SHL|SHR|SAR)XLload [off] {sym} ptr (MOVLconst [c]) mem) => ((SHL|SHR|SAR)Lconst [int8(c&31)] (MOVLload [off] {sym} ptr mem))
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@ -937,13 +937,41 @@ func init() {
 		{name: "MOVBELstore", argLength: 3, reg: gpstore, asm: "MOVBEL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 4 bytes in arg1 to arg0+auxint+aux. arg2=mem
 		{name: "MOVBEQload", argLength: 2, reg: gpload, asm: "MOVBEQ", aux: "SymOff", typ: "UInt64", faultOnNilArg0: true, symEffect: "Read"}, // load and swap 8 bytes from arg0+auxint+aux. arg1=mem
 		{name: "MOVBEQstore", argLength: 3, reg: gpstore, asm: "MOVBEQ", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // swap and store 8 bytes in arg1 to arg0+auxint+aux. arg2=mem
+		// indexed MOVBE loads
+		{name: "MOVBELloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", typ: "UInt32", symEffect: "Read"}, // load and swap 4 bytes from arg0+arg1+auxint+aux. arg2=mem. Zero extend.
+		{name: "MOVBELloadidx4", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 4, aux: "SymOff", typ: "UInt32", symEffect: "Read"},                    // load and swap 4 bytes from arg0+4*arg1+auxint+aux. arg2=mem. Zero extend.
+		{name: "MOVBELloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEL", scale: 8, aux: "SymOff", typ: "UInt32", symEffect: "Read"},                    // load and swap 4 bytes from arg0+8*arg1+auxint+aux. arg2=mem. Zero extend.
+		{name: "MOVBEQloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", typ: "UInt64", symEffect: "Read"}, // load and swap 8 bytes from arg0+arg1+auxint+aux. arg2=mem
+		{name: "MOVBEQloadidx8", argLength: 3, reg: gploadidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", typ: "UInt64", symEffect: "Read"},                    // load and swap 8 bytes from arg0+8*arg1+auxint+aux. arg2=mem
+		// indexed MOVBE stores
+		{name: "MOVBEWstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEW", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 2 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+		{name: "MOVBEWstoreidx2", argLength: 4, reg: gpstoreidx, asm: "MOVBEW", scale: 2, aux: "SymOff", symEffect: "Write"},                    // swap and store 2 bytes in arg2 to arg0+2*arg1+auxint+aux. arg3=mem
+		{name: "MOVBELstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEL", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 4 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+		{name: "MOVBELstoreidx4", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 4, aux: "SymOff", symEffect: "Write"},                    // swap and store 4 bytes in arg2 to arg0+4*arg1+auxint+aux. arg3=mem
+		{name: "MOVBELstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEL", scale: 8, aux: "SymOff", symEffect: "Write"},                    // swap and store 4 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem
+		{name: "MOVBEQstoreidx1", argLength: 4, reg: gpstoreidx, commutative: true, asm: "MOVBEQ", scale: 1, aux: "SymOff", symEffect: "Write"}, // swap and store 8 bytes in arg2 to arg0+arg1+auxint+aux. arg3=mem
+		{name: "MOVBEQstoreidx8", argLength: 4, reg: gpstoreidx, asm: "MOVBEQ", scale: 8, aux: "SymOff", symEffect: "Write"},                    // swap and store 8 bytes in arg2 to arg0+8*arg1+auxint+aux. arg3=mem

 		// CPUID feature: BMI2.
+		{name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
+		{name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
+		{name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64
+		{name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32
+		{name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64
+		{name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32
+
+		{name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
+		{name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64
 		{name: "SHLXLload", argLength: 3, reg: gp21shxload, asm: "SHLXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 32
 		{name: "SHLXQload", argLength: 3, reg: gp21shxload, asm: "SHLXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+auxint+aux) << arg1, arg2=mem, shift amount is mod 64
 		{name: "SHRXLload", argLength: 3, reg: gp21shxload, asm: "SHRXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
 		{name: "SHRXQload", argLength: 3, reg: gp21shxload, asm: "SHRXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // unsigned *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64

+		{name: "SARXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+		{name: "SARXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+4*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+		{name: "SARXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 32
+		{name: "SARXQloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 1, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+1*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
+		{name: "SARXQloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SARXQ", scale: 8, aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+8*arg1+auxint+aux) >> arg2, arg3=mem, shift amount is mod 64
 		{name: "SHLXLloadidx1", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 1, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+1*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
 		{name: "SHLXLloadidx4", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 4, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+4*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
 		{name: "SHLXLloadidx8", argLength: 4, reg: gp21shxloadidx, asm: "SHLXL", scale: 8, aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // *(arg0+8*arg1+auxint+aux) << arg2, arg3=mem, shift amount is mod 32
@ -973,6 +1001,12 @@ func init() {
 		{name: "NEF", controls: 1},
 		{name: "ORD", controls: 1}, // FP, ordered comparison (parity zero)
 		{name: "NAN", controls: 1}, // FP, unordered comparison (parity one)
+
+		// JUMPTABLE implements jump tables.
+		// Aux is the symbol (an *obj.LSym) for the jump table.
+		// control[0] is the index into the jump table.
+		// control[1] is the address of the jump table (the address of the symbol stored in Aux).
+		{name: "JUMPTABLE", controls: 2, aux: "Sym"},
 	}

 	archs = append(archs, arch{
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@ -515,15 +515,18 @@

 // simplifications
 (Or(64|32|16|8) x x) => x
-(Or(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
+(Or(64|32|16|8) (Const(64|32|16|8)  [0]) x) => x
 (Or(64|32|16|8) (Const(64|32|16|8) [-1]) _) => (Const(64|32|16|8) [-1])
+(Or(64|32|16|8) (Com(64|32|16|8)     x)  x) => (Const(64|32|16|8) [-1])

 (And(64|32|16|8) x x) => x
 (And(64|32|16|8) (Const(64|32|16|8) [-1]) x) => x
-(And(64|32|16|8) (Const(64|32|16|8) [0]) _) => (Const(64|32|16|8) [0])
+(And(64|32|16|8) (Const(64|32|16|8)  [0]) _) => (Const(64|32|16|8) [0])
+(And(64|32|16|8) (Com(64|32|16|8)     x)  x) => (Const(64|32|16|8) [0])

 (Xor(64|32|16|8) x x) => (Const(64|32|16|8) [0])
 (Xor(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
+(Xor(64|32|16|8) (Com(64|32|16|8)    x)  x) => (Const(64|32|16|8) [-1])

 (Add(64|32|16|8) (Const(64|32|16|8) [0]) x) => x
 (Sub(64|32|16|8) x x) => (Const(64|32|16|8) [0])
@ -533,6 +536,13 @@
 (Com(64|32|16|8) (Const(64|32|16|8) [c])) => (Const(64|32|16|8) [^c])

 (Neg(64|32|16|8) (Sub(64|32|16|8) x y)) => (Sub(64|32|16|8) y x)
+(Add(64|32|16|8) x (Neg(64|32|16|8) y)) => (Sub(64|32|16|8) x y)
+
+(Xor(64|32|16|8) (Const(64|32|16|8) [-1]) x) => (Com(64|32|16|8) x)
+
+(Sub(64|32|16|8) (Neg(64|32|16|8) x) (Com(64|32|16|8) x)) => (Const(64|32|16|8) [1])
+(Sub(64|32|16|8) (Com(64|32|16|8) x) (Neg(64|32|16|8) x)) => (Const(64|32|16|8) [-1])
+(Add(64|32|16|8) (Com(64|32|16|8) x)                  x)  => (Const(64|32|16|8) [-1])

 // ^(x-1) == ^x+1 == -x
 (Add(64|32|16|8) (Const(64|32|16|8) [1]) (Com(64|32|16|8) x)) => (Neg(64|32|16|8) x)
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@ -639,12 +639,13 @@ var genericOps = []opData{
 //    First                []   [always, never]

 var genericBlocks = []blockData{
-	{name: "Plain"},               // a single successor
-	{name: "If", controls: 1},     // if Controls[0] goto Succs[0] else goto Succs[1]
-	{name: "Defer", controls: 1},  // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
-	{name: "Ret", controls: 1},    // no successors, Controls[0] value is memory result
-	{name: "RetJmp", controls: 1}, // no successors, Controls[0] value is a tail call
-	{name: "Exit", controls: 1},   // no successors, Controls[0] value generates a panic
+	{name: "Plain"},                  // a single successor
+	{name: "If", controls: 1},        // if Controls[0] goto Succs[0] else goto Succs[1]
+	{name: "Defer", controls: 1},     // Succs[0]=defer queued, Succs[1]=defer recovered. Controls[0] is call op (of memory type)
+	{name: "Ret", controls: 1},       // no successors, Controls[0] value is memory result
+	{name: "RetJmp", controls: 1},    // no successors, Controls[0] value is a tail call
+	{name: "Exit", controls: 1},      // no successors, Controls[0] value generates a panic
+	{name: "JumpTable", controls: 1}, // multiple successors, the integer Controls[0] selects which one

 	// transient block state used for dead code removal
 	{name: "First"}, // 2 successors, always takes the first one (second is dead)
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@ -1838,6 +1838,8 @@ func (op opData) auxIntType() string {
 // auxType returns the Go type that this block should store in its aux field.
 func (b blockData) auxType() string {
 	switch b.aux {
+	case "Sym":
+		return "Sym"
 	case "S390XCCMask", "S390XCCMaskInt8", "S390XCCMaskUint8":
 		return "s390x.CCMask"
 	case "S390XRotateParams":
--- a/src/cmd/compile/internal/ssa/location.go
+++ b/src/cmd/compile/internal/ssa/location.go
@ -46,19 +46,19 @@ func (r *Register) GCNum() int16 {
 // variable that has been decomposed into multiple stack slots.
 // As an example, a string could have the following configurations:
 //
-//            stack layout              LocalSlots
+//	          stack layout              LocalSlots
 //
-//  Optimizations are disabled. s is on the stack and represented in its entirety.
-//  [ ------- s string ---- ] { N: s, Type: string, Off: 0 }
+//	Optimizations are disabled. s is on the stack and represented in its entirety.
+//	[ ------- s string ---- ] { N: s, Type: string, Off: 0 }
 //
-//  s was not decomposed, but the SSA operates on its parts individually, so
-//  there is a LocalSlot for each of its fields that points into the single stack slot.
-//  [ ------- s string ---- ] { N: s, Type: *uint8, Off: 0 }, {N: s, Type: int, Off: 8}
+//	s was not decomposed, but the SSA operates on its parts individually, so
+//	there is a LocalSlot for each of its fields that points into the single stack slot.
+//	[ ------- s string ---- ] { N: s, Type: *uint8, Off: 0 }, {N: s, Type: int, Off: 8}
 //
-//  s was decomposed. Each of its fields is in its own stack slot and has its own LocalSLot.
-//  [ ptr *uint8 ] [ len int] { N: ptr, Type: *uint8, Off: 0, SplitOf: parent, SplitOffset: 0},
-//                            { N: len, Type: int, Off: 0, SplitOf: parent, SplitOffset: 8}
-//                            parent = &{N: s, Type: string}
+//	s was decomposed. Each of its fields is in its own stack slot and has its own LocalSLot.
+//	[ ptr *uint8 ] [ len int] { N: ptr, Type: *uint8, Off: 0, SplitOf: parent, SplitOffset: 0},
+//	                          { N: len, Type: int, Off: 0, SplitOf: parent, SplitOffset: 8}
+//	                          parent = &{N: s, Type: string}
 type LocalSlot struct {
 	N    *ir.Name    // an ONAME *ir.Name representing a stack location.
 	Type *types.Type // type of slot
--- a/src/cmd/compile/internal/ssa/loopbce.go
+++ b/src/cmd/compile/internal/ssa/loopbce.go
@ -31,9 +31,10 @@ type indVar struct {

 // parseIndVar checks whether the SSA value passed as argument is a valid induction
 // variable, and, if so, extracts:
-//   * the minimum bound
-//   * the increment value
-//   * the "next" value (SSA value that is Phi'd into the induction variable every loop)
+//   - the minimum bound
+//   - the increment value
+//   - the "next" value (SSA value that is Phi'd into the induction variable every loop)
+//
 // Currently, we detect induction variables that match (Phi min nxt),
 // with nxt being (Add inc ind).
 // If it can't parse the induction variable correctly, it returns (nil, nil, nil).
@ -66,19 +67,18 @@ func parseIndVar(ind *Value) (min, inc, nxt *Value) {
 //
 // Look for variables and blocks that satisfy the following
 //
-//  loop:
-//    ind = (Phi min nxt),
-//    if ind < max
-//      then goto enter_loop
-//      else goto exit_loop
+//	 loop:
+//	   ind = (Phi min nxt),
+//	   if ind < max
+//	     then goto enter_loop
+//	     else goto exit_loop
 //
-//    enter_loop:
-// 	do something
-//       nxt = inc + ind
-// 	goto loop
-//
-//  exit_loop:
+//	   enter_loop:
+//		do something
+//	      nxt = inc + ind
+//		goto loop
 //
+//	 exit_loop:
 //
 // TODO: handle 32 bit operations
 func findIndVar(f *Func) []indVar {
--- a/src/cmd/compile/internal/ssa/looprotate.go
+++ b/src/cmd/compile/internal/ssa/looprotate.go
@ -8,19 +8,19 @@ package ssa
 // to loops with a check-loop-condition-at-end.
 // This helps loops avoid extra unnecessary jumps.
 //
-//   loop:
-//     CMPQ ...
-//     JGE exit
-//     ...
-//     JMP loop
-//   exit:
+//	 loop:
+//	   CMPQ ...
+//	   JGE exit
+//	   ...
+//	   JMP loop
+//	 exit:
 //
-//    JMP entry
-//  loop:
-//    ...
-//  entry:
-//    CMPQ ...
-//    JLT loop
+//	  JMP entry
+//	loop:
+//	  ...
+//	entry:
+//	  CMPQ ...
+//	  JLT loop
 func loopRotate(f *Func) {
 	loopnest := f.loopnest()
 	if loopnest.hasIrreducible {
--- a/src/cmd/compile/internal/ssa/magic.go
+++ b/src/cmd/compile/internal/ssa/magic.go
@ -110,7 +110,8 @@ type umagicData struct {

 // umagic computes the constants needed to strength reduce unsigned n-bit divides by the constant uint64(c).
 // The return values satisfy for all 0 <= x < 2^n
-//  floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
+//
+//	floor(x / uint64(c)) = x * (m + 2^n) >> (n+s)
 func umagic(n uint, c int64) umagicData {
 	// Convert from ConstX auxint values to the real uint64 constant they represent.
 	d := uint64(c) << (64 - n) >> (64 - n)
@ -183,7 +184,8 @@ type smagicData struct {
 // magic computes the constants needed to strength reduce signed n-bit divides by the constant c.
 // Must have c>0.
 // The return values satisfy for all -2^(n-1) <= x < 2^(n-1)
-//  trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
+//
+//	trunc(x / c) = x * m >> (n+s) + (x < 0 ? 1 : 0)
 func smagic(n uint, c int64) smagicData {
 	C := new(big.Int).SetInt64(c)
 	s := C.BitLen() - 1
--- a/src/cmd/compile/internal/ssa/op.go
+++ b/src/cmd/compile/internal/ssa/op.go
@ -391,9 +391,9 @@ const (

 // A Sym represents a symbolic offset from a base register.
 // Currently a Sym can be one of 3 things:
-//  - a *gc.Node, for an offset from SP (the stack pointer)
-//  - a *obj.LSym, for an offset from SB (the global pointer)
-//  - nil, for no offset
+//   - a *gc.Node, for an offset from SP (the stack pointer)
+//   - a *obj.LSym, for an offset from SB (the global pointer)
+//   - nil, for no offset
 type Sym interface {
 	CanBeAnSSASym()
 	CanBeAnSSAAux()
@ -479,12 +479,13 @@ const (
 )

 // boundsAPI determines which register arguments a bounds check call should use. For an [a:b:c] slice, we do:
-//   CMPQ c, cap
-//   JA   fail1
-//   CMPQ b, c
-//   JA   fail2
-//   CMPQ a, b
-//   JA   fail3
+//
+//	CMPQ c, cap
+//	JA   fail1
+//	CMPQ b, c
+//	JA   fail2
+//	CMPQ a, b
+//	JA   fail3
 //
 // fail1: CALL panicSlice3Acap (c, cap)
 // fail2: CALL panicSlice3B (b, c)
--- a/src/cmd/compile/internal/ssa/opGen.go
+++ b/src/cmd/compile/internal/ssa/opGen.go
@ -50,6 +50,7 @@ const (
 	BlockAMD64NEF
 	BlockAMD64ORD
 	BlockAMD64NAN
+	BlockAMD64JUMPTABLE

 	BlockARMEQ
 	BlockARMNE
@ -149,6 +150,7 @@ const (
 	BlockRet
 	BlockRetJmp
 	BlockExit
+	BlockJumpTable
 	BlockFirst
 )

@ -172,22 +174,23 @@ var blockString = [...]string{
 	Block386ORD: "ORD",
 	Block386NAN: "NAN",

-	BlockAMD64EQ:  "EQ",
-	BlockAMD64NE:  "NE",
-	BlockAMD64LT:  "LT",
-	BlockAMD64LE:  "LE",
-	BlockAMD64GT:  "GT",
-	BlockAMD64GE:  "GE",
-	BlockAMD64OS:  "OS",
-	BlockAMD64OC:  "OC",
-	BlockAMD64ULT: "ULT",
-	BlockAMD64ULE: "ULE",
-	BlockAMD64UGT: "UGT",
-	BlockAMD64UGE: "UGE",
-	BlockAMD64EQF: "EQF",
-	BlockAMD64NEF: "NEF",
-	BlockAMD64ORD: "ORD",
-	BlockAMD64NAN: "NAN",
+	BlockAMD64EQ:        "EQ",
+	BlockAMD64NE:        "NE",
+	BlockAMD64LT:        "LT",
+	BlockAMD64LE:        "LE",
+	BlockAMD64GT:        "GT",
+	BlockAMD64GE:        "GE",
+	BlockAMD64OS:        "OS",
+	BlockAMD64OC:        "OC",
+	BlockAMD64ULT:       "ULT",
+	BlockAMD64ULE:       "ULE",
+	BlockAMD64UGT:       "UGT",
+	BlockAMD64UGE:       "UGE",
+	BlockAMD64EQF:       "EQF",
+	BlockAMD64NEF:       "NEF",
+	BlockAMD64ORD:       "ORD",
+	BlockAMD64NAN:       "NAN",
+	BlockAMD64JUMPTABLE: "JUMPTABLE",

 	BlockARMEQ:     "EQ",
 	BlockARMNE:     "NE",
@ -281,13 +284,14 @@ var blockString = [...]string{
 	BlockS390XCLIJ:  "CLIJ",
 	BlockS390XCLGIJ: "CLGIJ",

-	BlockPlain:  "Plain",
-	BlockIf:     "If",
-	BlockDefer:  "Defer",
-	BlockRet:    "Ret",
-	BlockRetJmp: "RetJmp",
-	BlockExit:   "Exit",
-	BlockFirst:  "First",
+	BlockPlain:     "Plain",
+	BlockIf:        "If",
+	BlockDefer:     "Defer",
+	BlockRet:       "Ret",
+	BlockRetJmp:    "RetJmp",
+	BlockExit:      "Exit",
+	BlockJumpTable: "JumpTable",
+	BlockFirst:     "First",
 }

 func (k BlockKind) String() string { return blockString[k] }
@ -1050,10 +1054,35 @@ const (
 	OpAMD64MOVBELstore
 	OpAMD64MOVBEQload
 	OpAMD64MOVBEQstore
+	OpAMD64MOVBELloadidx1
+	OpAMD64MOVBELloadidx4
+	OpAMD64MOVBELloadidx8
+	OpAMD64MOVBEQloadidx1
+	OpAMD64MOVBEQloadidx8
+	OpAMD64MOVBEWstoreidx1
+	OpAMD64MOVBEWstoreidx2
+	OpAMD64MOVBELstoreidx1
+	OpAMD64MOVBELstoreidx4
+	OpAMD64MOVBELstoreidx8
+	OpAMD64MOVBEQstoreidx1
+	OpAMD64MOVBEQstoreidx8
+	OpAMD64SARXQ
+	OpAMD64SARXL
+	OpAMD64SHLXQ
+	OpAMD64SHLXL
+	OpAMD64SHRXQ
+	OpAMD64SHRXL
+	OpAMD64SARXLload
+	OpAMD64SARXQload
 	OpAMD64SHLXLload
 	OpAMD64SHLXQload
 	OpAMD64SHRXLload
 	OpAMD64SHRXQload
+	OpAMD64SARXLloadidx1
+	OpAMD64SARXLloadidx4
+	OpAMD64SARXLloadidx8
+	OpAMD64SARXQloadidx1
+	OpAMD64SARXQloadidx8
 	OpAMD64SHLXLloadidx1
 	OpAMD64SHLXLloadidx4
 	OpAMD64SHLXLloadidx8
@ -13910,6 +13939,319 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:        "MOVBELloadidx1",
+		auxType:     auxSymOff,
+		argLen:      3,
+		commutative: true,
+		symEffect:   SymRead,
+		asm:         x86.AMOVBEL,
+		scale:       1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:      "MOVBELloadidx4",
+		auxType:   auxSymOff,
+		argLen:    3,
+		symEffect: SymRead,
+		asm:       x86.AMOVBEL,
+		scale:     4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:      "MOVBELloadidx8",
+		auxType:   auxSymOff,
+		argLen:    3,
+		symEffect: SymRead,
+		asm:       x86.AMOVBEL,
+		scale:     8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:        "MOVBEQloadidx1",
+		auxType:     auxSymOff,
+		argLen:      3,
+		commutative: true,
+		symEffect:   SymRead,
+		asm:         x86.AMOVBEQ,
+		scale:       1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:      "MOVBEQloadidx8",
+		auxType:   auxSymOff,
+		argLen:    3,
+		symEffect: SymRead,
+		asm:       x86.AMOVBEQ,
+		scale:     8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:        "MOVBEWstoreidx1",
+		auxType:     auxSymOff,
+		argLen:      4,
+		commutative: true,
+		symEffect:   SymWrite,
+		asm:         x86.AMOVBEW,
+		scale:       1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:      "MOVBEWstoreidx2",
+		auxType:   auxSymOff,
+		argLen:    4,
+		symEffect: SymWrite,
+		asm:       x86.AMOVBEW,
+		scale:     2,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:        "MOVBELstoreidx1",
+		auxType:     auxSymOff,
+		argLen:      4,
+		commutative: true,
+		symEffect:   SymWrite,
+		asm:         x86.AMOVBEL,
+		scale:       1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:      "MOVBELstoreidx4",
+		auxType:   auxSymOff,
+		argLen:    4,
+		symEffect: SymWrite,
+		asm:       x86.AMOVBEL,
+		scale:     4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:      "MOVBELstoreidx8",
+		auxType:   auxSymOff,
+		argLen:    4,
+		symEffect: SymWrite,
+		asm:       x86.AMOVBEL,
+		scale:     8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:        "MOVBEQstoreidx1",
+		auxType:     auxSymOff,
+		argLen:      4,
+		commutative: true,
+		symEffect:   SymWrite,
+		asm:         x86.AMOVBEQ,
+		scale:       1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:      "MOVBEQstoreidx8",
+		auxType:   auxSymOff,
+		argLen:    4,
+		symEffect: SymWrite,
+		asm:       x86.AMOVBEQ,
+		scale:     8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{2, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+		},
+	},
+	{
+		name:   "SARXQ",
+		argLen: 2,
+		asm:    x86.ASARXQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:   "SARXL",
+		argLen: 2,
+		asm:    x86.ASARXL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:   "SHLXQ",
+		argLen: 2,
+		asm:    x86.ASHLXQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:   "SHLXL",
+		argLen: 2,
+		asm:    x86.ASHLXL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:   "SHRXQ",
+		argLen: 2,
+		asm:    x86.ASHRXQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:   "SHRXL",
+		argLen: 2,
+		asm:    x86.ASHRXL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXLload",
+		auxType:        auxSymOff,
+		argLen:         3,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXL,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXQload",
+		auxType:        auxSymOff,
+		argLen:         3,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXQ,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{1, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
 	{
 		name:           "SHLXLload",
 		auxType:        auxSymOff,
@ -13978,6 +14320,101 @@ var opcodeTable = [...]opInfo{
 			},
 		},
 	},
+	{
+		name:           "SARXLloadidx1",
+		auxType:        auxSymOff,
+		argLen:         4,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXL,
+		scale:          1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXLloadidx4",
+		auxType:        auxSymOff,
+		argLen:         4,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXL,
+		scale:          4,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXLloadidx8",
+		auxType:        auxSymOff,
+		argLen:         4,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXL,
+		scale:          8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXQloadidx1",
+		auxType:        auxSymOff,
+		argLen:         4,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXQ,
+		scale:          1,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
+	{
+		name:           "SARXQloadidx8",
+		auxType:        auxSymOff,
+		argLen:         4,
+		faultOnNilArg0: true,
+		symEffect:      SymRead,
+		asm:            x86.ASARXQ,
+		scale:          8,
+		reg: regInfo{
+			inputs: []inputInfo{
+				{2, 49135},      // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{1, 49151},      // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
+				{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
+			},
+			outputs: []outputInfo{
+				{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
+			},
+		},
+	},
 	{
 		name:           "SHLXLloadidx1",
 		auxType:        auxSymOff,
--- a/src/cmd/compile/internal/ssa/phielim.go
+++ b/src/cmd/compile/internal/ssa/phielim.go
@ -8,13 +8,19 @@ package ssa
 // A phi is redundant if its arguments are all equal. For
 // purposes of counting, ignore the phi itself. Both of
 // these phis are redundant:
-//   v = phi(x,x,x)
-//   v = phi(x,v,x,v)
+//
+//	v = phi(x,x,x)
+//	v = phi(x,v,x,v)
+//
 // We repeat this process to also catch situations like:
-//   v = phi(x, phi(x, x), phi(x, v))
+//
+//	v = phi(x, phi(x, x), phi(x, v))
+//
 // TODO: Can we also simplify cases like:
-//   v = phi(v, w, x)
-//   w = phi(v, w, x)
+//
+//	v = phi(v, w, x)
+//	w = phi(v, w, x)
+//
 // and would that be useful?
 func phielim(f *Func) {
 	for {
--- a/src/cmd/compile/internal/ssa/phiopt.go
+++ b/src/cmd/compile/internal/ssa/phiopt.go
@ -7,20 +7,22 @@ package ssa
 // phiopt eliminates boolean Phis based on the previous if.
 //
 // Main use case is to transform:
-//   x := false
-//   if b {
-//     x = true
-//   }
+//
+//	x := false
+//	if b {
+//	  x = true
+//	}
+//
 // into x = b.
 //
 // In SSA code this appears as
 //
-//  b0
-//    If b -> b1 b2
-//  b1
-//    Plain -> b2
-//  b2
-//    x = (OpPhi (ConstBool [true]) (ConstBool [false]))
+//	b0
+//	  If b -> b1 b2
+//	b1
+//	  Plain -> b2
+//	b2
+//	  x = (OpPhi (ConstBool [true]) (ConstBool [false]))
 //
 // In this case we can replace x with a copy of b.
 func phiopt(f *Func) {
--- a/src/cmd/compile/internal/ssa/poset.go
+++ b/src/cmd/compile/internal/ssa/poset.go
@ -140,11 +140,11 @@ type posetNode struct {
 // to record that A<I, A<J, A<K (with no known relation between I,J,K), we create the
 // following DAG:
 //
-//         A
-//        / \
-//       I  extra
-//           /  \
-//          J    K
+//	  A
+//	 / \
+//	I  extra
+//	    /  \
+//	   J    K
 type poset struct {
 	lastidx   uint32            // last generated dense index
 	flags     uint8             // internal flags
--- a/src/cmd/compile/internal/ssa/prove.go
+++ b/src/cmd/compile/internal/ssa/prove.go
@ -16,6 +16,10 @@ const (
 	unknown branch = iota
 	positive
 	negative
+	// The outedges from a jump table are jumpTable0,
+	// jumpTable0+1, jumpTable0+2, etc. There could be an
+	// arbitrary number so we can't list them all here.
+	jumpTable0
 )

 // relation represents the set of possible relations between
@ -27,17 +31,17 @@ const (
 //
 // E.g.
 //
-//  r := relation(...)
+//	r := relation(...)
 //
-//  if v < w {
-//    newR := r & lt
-//  }
-//  if v >= w {
-//    newR := r & (eq|gt)
-//  }
-//  if v != w {
-//    newR := r & (lt|gt)
-//  }
+//	if v < w {
+//	  newR := r & lt
+//	}
+//	if v >= w {
+//	  newR := r & (eq|gt)
+//	}
+//	if v != w {
+//	  newR := r & (lt|gt)
+//	}
 type relation uint

 const (
@ -746,19 +750,19 @@ func (ft *factsTable) cleanup(f *Func) {
 // By far, the most common redundant pair are generated by bounds checking.
 // For example for the code:
 //
-//    a[i] = 4
-//    foo(a[i])
+//	a[i] = 4
+//	foo(a[i])
 //
 // The compiler will generate the following code:
 //
-//    if i >= len(a) {
-//        panic("not in bounds")
-//    }
-//    a[i] = 4
-//    if i >= len(a) {
-//        panic("not in bounds")
-//    }
-//    foo(a[i])
+//	if i >= len(a) {
+//	    panic("not in bounds")
+//	}
+//	a[i] = 4
+//	if i >= len(a) {
+//	    panic("not in bounds")
+//	}
+//	foo(a[i])
 //
 // The second comparison i >= len(a) is clearly redundant because if the
 // else branch of the first comparison is executed, we already know that i < len(a).
@ -940,20 +944,31 @@ func prove(f *Func) {
 // getBranch returns the range restrictions added by p
 // when reaching b. p is the immediate dominator of b.
 func getBranch(sdom SparseTree, p *Block, b *Block) branch {
-	if p == nil || p.Kind != BlockIf {
+	if p == nil {
 		return unknown
 	}
-	// If p and p.Succs[0] are dominators it means that every path
-	// from entry to b passes through p and p.Succs[0]. We care that
-	// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
-	// has one predecessor then (apart from the degenerate case),
-	// there is no path from entry that can reach b through p.Succs[1].
-	// TODO: how about p->yes->b->yes, i.e. a loop in yes.
-	if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
-		return positive
-	}
-	if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
-		return negative
+	switch p.Kind {
+	case BlockIf:
+		// If p and p.Succs[0] are dominators it means that every path
+		// from entry to b passes through p and p.Succs[0]. We care that
+		// no path from entry to b passes through p.Succs[1]. If p.Succs[0]
+		// has one predecessor then (apart from the degenerate case),
+		// there is no path from entry that can reach b through p.Succs[1].
+		// TODO: how about p->yes->b->yes, i.e. a loop in yes.
+		if sdom.IsAncestorEq(p.Succs[0].b, b) && len(p.Succs[0].b.Preds) == 1 {
+			return positive
+		}
+		if sdom.IsAncestorEq(p.Succs[1].b, b) && len(p.Succs[1].b.Preds) == 1 {
+			return negative
+		}
+	case BlockJumpTable:
+		// TODO: this loop can lead to quadratic behavior, as
+		// getBranch can be called len(p.Succs) times.
+		for i, e := range p.Succs {
+			if sdom.IsAncestorEq(e.b, b) && len(e.b.Preds) == 1 {
+				return jumpTable0 + branch(i)
+			}
+		}
 	}
 	return unknown
 }
@ -984,11 +999,36 @@ func addIndVarRestrictions(ft *factsTable, b *Block, iv indVar) {
 // branching from Block b in direction br.
 func addBranchRestrictions(ft *factsTable, b *Block, br branch) {
 	c := b.Controls[0]
-	switch br {
-	case negative:
+	switch {
+	case br == negative:
 		addRestrictions(b, ft, boolean, nil, c, eq)
-	case positive:
+	case br == positive:
 		addRestrictions(b, ft, boolean, nil, c, lt|gt)
+	case br >= jumpTable0:
+		idx := br - jumpTable0
+		val := int64(idx)
+		if v, off := isConstDelta(c); v != nil {
+			// Establish the bound on the underlying value we're switching on,
+			// not on the offset-ed value used as the jump table index.
+			c = v
+			val -= off
+		}
+		old, ok := ft.limits[c.ID]
+		if !ok {
+			old = noLimit
+		}
+		ft.limitStack = append(ft.limitStack, limitFact{c.ID, old})
+		if val < old.min || val > old.max || uint64(val) < old.umin || uint64(val) > old.umax {
+			ft.unsat = true
+			if b.Func.pass.debug > 2 {
+				b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d unsat", b, idx, c, val)
+			}
+		} else {
+			ft.limits[c.ID] = limit{val, val, uint64(val), uint64(val)}
+			if b.Func.pass.debug > 2 {
+				b.Func.Warnl(b.Pos, "block=%s outedge=%d %s=%d", b, idx, c, val)
+			}
+		}
 	default:
 		panic("unknown branch")
 	}
@ -1343,10 +1383,14 @@ func removeBranch(b *Block, branch branch) {
 		// attempt to preserve statement marker.
 		b.Pos = b.Pos.WithIsStmt()
 	}
-	b.Kind = BlockFirst
-	b.ResetControls()
-	if branch == positive {
-		b.swapSuccessors()
+	if branch == positive || branch == negative {
+		b.Kind = BlockFirst
+		b.ResetControls()
+		if branch == positive {
+			b.swapSuccessors()
+		}
+	} else {
+		// TODO: figure out how to remove an entry from a jump table
 	}
 }

--- a/src/cmd/compile/internal/ssa/rewrite.go
+++ b/src/cmd/compile/internal/ssa/rewrite.go
@ -5,6 +5,7 @@
 package ssa

 import (
+	"cmd/compile/internal/base"
 	"cmd/compile/internal/logopt"
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
@ -962,8 +963,9 @@ found:

 // clobber invalidates values. Returns true.
 // clobber is used by rewrite rules to:
-//   A) make sure the values are really dead and never used again.
-//   B) decrement use counts of the values' args.
+//
+//	A) make sure the values are really dead and never used again.
+//	B) decrement use counts of the values' args.
 func clobber(vv ...*Value) bool {
 	for _, v := range vv {
 		v.reset(OpInvalid)
@ -985,7 +987,9 @@ func clobberIfDead(v *Value) bool {

 // noteRule is an easy way to track if a rule is matched when writing
 // new ones.  Make the rule of interest also conditional on
-//     noteRule("note to self: rule of interest matched")
+//
+//	noteRule("note to self: rule of interest matched")
+//
 // and that message will print when the rule matches.
 func noteRule(s string) bool {
 	fmt.Println(s)
@ -1789,9 +1793,11 @@ func sequentialAddresses(x, y *Value, n int64) bool {
 // We happen to match the semantics to those of arm/arm64.
 // Note that these semantics differ from x86: the carry flag has the opposite
 // sense on a subtraction!
-//   On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
-//   On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
-//    (because it does x + ^y + C).
+//
+//	On amd64, C=1 represents a borrow, e.g. SBB on amd64 does x - y - C.
+//	On arm64, C=0 represents a borrow, e.g. SBC on arm64 does x - y - ^C.
+//	 (because it does x + ^y + C).
+//
 // See https://en.wikipedia.org/wiki/Carry_flag#Vs._borrow_flag
 type flagConstant uint8

@ -1949,3 +1955,9 @@ func logicFlags32(x int32) flagConstant {
 	fcb.N = x < 0
 	return fcb.encode()
 }
+
+func makeJumpTableSym(b *Block) *obj.LSym {
+	s := base.Ctxt.Lookup(fmt.Sprintf("%s.jump%d", b.Func.fe.LSym(), b.ID))
+	s.Set(obj.AttrDuplicateOK, true)
+	return s
+}
--- a/src/cmd/compile/internal/ssa/rewriteAMD64.go
+++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go
--- a/src/cmd/compile/internal/ssa/rewriteCond_test.go
+++ b/src/cmd/compile/internal/ssa/rewriteCond_test.go
@ -68,8 +68,10 @@ func TestCondRewrite(t *testing.T) {
 }

 // Profile the aforementioned optimization from two angles:
-//   SoloJump: generated branching code has one 'jump', for '<' and '>='
-//   CombJump: generated branching code has two consecutive 'jump', for '<=' and '>'
+//
+//	SoloJump: generated branching code has one 'jump', for '<' and '>='
+//	CombJump: generated branching code has two consecutive 'jump', for '<=' and '>'
+//
 // We expect that 'CombJump' is generally on par with the non-optimized code, and
 // 'SoloJump' demonstrates some improvement.
 // It's for arm64 initially, please see https://github.com/golang/go/issues/38740
--- a/src/cmd/compile/internal/ssa/rewritegeneric.go
+++ b/src/cmd/compile/internal/ssa/rewritegeneric.go
@ -519,6 +519,38 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Add16 x (Neg16 y))
+	// result: (Sub16 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpNeg16 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpSub16)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (Add16 (Com16 x) x)
+	// result: (Const16 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst16)
+			v.AuxInt = int16ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Add16 (Const16 [1]) (Com16 x))
 	// result: (Neg16 x)
 	for {
@ -764,6 +796,38 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Add32 x (Neg32 y))
+	// result: (Sub32 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpNeg32 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpSub32)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (Add32 (Com32 x) x)
+	// result: (Const32 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst32)
+			v.AuxInt = int32ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Add32 (Const32 [1]) (Com32 x))
 	// result: (Neg32 x)
 	for {
@ -1036,6 +1100,38 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Add64 x (Neg64 y))
+	// result: (Sub64 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpNeg64 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpSub64)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (Add64 (Com64 x) x)
+	// result: (Const64 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst64)
+			v.AuxInt = int64ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Add64 (Const64 [1]) (Com64 x))
 	// result: (Neg64 x)
 	for {
@ -1308,6 +1404,38 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Add8 x (Neg8 y))
+	// result: (Sub8 x y)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			x := v_0
+			if v_1.Op != OpNeg8 {
+				continue
+			}
+			y := v_1.Args[0]
+			v.reset(OpSub8)
+			v.AddArg2(x, y)
+			return true
+		}
+		break
+	}
+	// match: (Add8 (Com8 x) x)
+	// result: (Const8 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst8)
+			v.AuxInt = int8ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Add8 (Const8 [1]) (Com8 x))
 	// result: (Neg8 x)
 	for {
@ -1630,6 +1758,23 @@ func rewriteValuegeneric_OpAnd16(v *Value) bool {
 		}
 		break
 	}
+	// match: (And16 (Com16 x) x)
+	// result: (Const16 [0])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst16)
+			v.AuxInt = int16ToAuxInt(0)
+			return true
+		}
+		break
+	}
 	// match: (And16 x (And16 x y))
 	// result: (And16 x y)
 	for {
@ -1828,6 +1973,23 @@ func rewriteValuegeneric_OpAnd32(v *Value) bool {
 		}
 		break
 	}
+	// match: (And32 (Com32 x) x)
+	// result: (Const32 [0])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst32)
+			v.AuxInt = int32ToAuxInt(0)
+			return true
+		}
+		break
+	}
 	// match: (And32 x (And32 x y))
 	// result: (And32 x y)
 	for {
@ -2026,6 +2188,23 @@ func rewriteValuegeneric_OpAnd64(v *Value) bool {
 		}
 		break
 	}
+	// match: (And64 (Com64 x) x)
+	// result: (Const64 [0])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst64)
+			v.AuxInt = int64ToAuxInt(0)
+			return true
+		}
+		break
+	}
 	// match: (And64 x (And64 x y))
 	// result: (And64 x y)
 	for {
@ -2224,6 +2403,23 @@ func rewriteValuegeneric_OpAnd8(v *Value) bool {
 		}
 		break
 	}
+	// match: (And8 (Com8 x) x)
+	// result: (Const8 [0])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst8)
+			v.AuxInt = int8ToAuxInt(0)
+			return true
+		}
+		break
+	}
 	// match: (And8 x (And8 x y))
 	// result: (And8 x y)
 	for {
@ -16964,6 +17160,23 @@ func rewriteValuegeneric_OpOr16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Or16 (Com16 x) x)
+	// result: (Const16 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst16)
+			v.AuxInt = int16ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Or16 x (Or16 x y))
 	// result: (Or16 x y)
 	for {
@ -17142,6 +17355,23 @@ func rewriteValuegeneric_OpOr32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Or32 (Com32 x) x)
+	// result: (Const32 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst32)
+			v.AuxInt = int32ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Or32 x (Or32 x y))
 	// result: (Or32 x y)
 	for {
@ -17320,6 +17550,23 @@ func rewriteValuegeneric_OpOr64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Or64 (Com64 x) x)
+	// result: (Const64 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst64)
+			v.AuxInt = int64ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Or64 x (Or64 x y))
 	// result: (Or64 x y)
 	for {
@ -17498,6 +17745,23 @@ func rewriteValuegeneric_OpOr8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Or8 (Com8 x) x)
+	// result: (Const8 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst8)
+			v.AuxInt = int8ToAuxInt(-1)
+			return true
+		}
+		break
+	}
 	// match: (Or8 x (Or8 x y))
 	// result: (Or8 x y)
 	for {
@ -22994,6 +23258,34 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
 		v.AuxInt = int16ToAuxInt(0)
 		return true
 	}
+	// match: (Sub16 (Neg16 x) (Com16 x))
+	// result: (Const16 [1])
+	for {
+		if v_0.Op != OpNeg16 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpCom16 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst16)
+		v.AuxInt = int16ToAuxInt(1)
+		return true
+	}
+	// match: (Sub16 (Com16 x) (Neg16 x))
+	// result: (Const16 [-1])
+	for {
+		if v_0.Op != OpCom16 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpNeg16 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst16)
+		v.AuxInt = int16ToAuxInt(-1)
+		return true
+	}
 	// match: (Sub16 (Add16 x y) x)
 	// result: y
 	for {
@ -23309,6 +23601,34 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
 		v.AuxInt = int32ToAuxInt(0)
 		return true
 	}
+	// match: (Sub32 (Neg32 x) (Com32 x))
+	// result: (Const32 [1])
+	for {
+		if v_0.Op != OpNeg32 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpCom32 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst32)
+		v.AuxInt = int32ToAuxInt(1)
+		return true
+	}
+	// match: (Sub32 (Com32 x) (Neg32 x))
+	// result: (Const32 [-1])
+	for {
+		if v_0.Op != OpCom32 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpNeg32 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst32)
+		v.AuxInt = int32ToAuxInt(-1)
+		return true
+	}
 	// match: (Sub32 (Add32 x y) x)
 	// result: y
 	for {
@ -23648,6 +23968,34 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
 		v.AuxInt = int64ToAuxInt(0)
 		return true
 	}
+	// match: (Sub64 (Neg64 x) (Com64 x))
+	// result: (Const64 [1])
+	for {
+		if v_0.Op != OpNeg64 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpCom64 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst64)
+		v.AuxInt = int64ToAuxInt(1)
+		return true
+	}
+	// match: (Sub64 (Com64 x) (Neg64 x))
+	// result: (Const64 [-1])
+	for {
+		if v_0.Op != OpCom64 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpNeg64 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst64)
+		v.AuxInt = int64ToAuxInt(-1)
+		return true
+	}
 	// match: (Sub64 (Add64 x y) x)
 	// result: y
 	for {
@ -23987,6 +24335,34 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
 		v.AuxInt = int8ToAuxInt(0)
 		return true
 	}
+	// match: (Sub8 (Neg8 x) (Com8 x))
+	// result: (Const8 [1])
+	for {
+		if v_0.Op != OpNeg8 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpCom8 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst8)
+		v.AuxInt = int8ToAuxInt(1)
+		return true
+	}
+	// match: (Sub8 (Com8 x) (Neg8 x))
+	// result: (Const8 [-1])
+	for {
+		if v_0.Op != OpCom8 {
+			break
+		}
+		x := v_0.Args[0]
+		if v_1.Op != OpNeg8 || x != v_1.Args[0] {
+			break
+		}
+		v.reset(OpConst8)
+		v.AuxInt = int8ToAuxInt(-1)
+		return true
+	}
 	// match: (Sub8 (Add8 x y) x)
 	// result: y
 	for {
@ -24714,6 +25090,37 @@ func rewriteValuegeneric_OpXor16(v *Value) bool {
 		}
 		break
 	}
+	// match: (Xor16 (Com16 x) x)
+	// result: (Const16 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom16 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst16)
+			v.AuxInt = int16ToAuxInt(-1)
+			return true
+		}
+		break
+	}
+	// match: (Xor16 (Const16 [-1]) x)
+	// result: (Com16 x)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst16 || auxIntToInt16(v_0.AuxInt) != -1 {
+				continue
+			}
+			x := v_1
+			v.reset(OpCom16)
+			v.AddArg(x)
+			return true
+		}
+		break
+	}
 	// match: (Xor16 x (Xor16 x y))
 	// result: y
 	for {
@ -24845,6 +25252,37 @@ func rewriteValuegeneric_OpXor32(v *Value) bool {
 		}
 		break
 	}
+	// match: (Xor32 (Com32 x) x)
+	// result: (Const32 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom32 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst32)
+			v.AuxInt = int32ToAuxInt(-1)
+			return true
+		}
+		break
+	}
+	// match: (Xor32 (Const32 [-1]) x)
+	// result: (Com32 x)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst32 || auxIntToInt32(v_0.AuxInt) != -1 {
+				continue
+			}
+			x := v_1
+			v.reset(OpCom32)
+			v.AddArg(x)
+			return true
+		}
+		break
+	}
 	// match: (Xor32 x (Xor32 x y))
 	// result: y
 	for {
@ -24976,6 +25414,37 @@ func rewriteValuegeneric_OpXor64(v *Value) bool {
 		}
 		break
 	}
+	// match: (Xor64 (Com64 x) x)
+	// result: (Const64 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom64 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst64)
+			v.AuxInt = int64ToAuxInt(-1)
+			return true
+		}
+		break
+	}
+	// match: (Xor64 (Const64 [-1]) x)
+	// result: (Com64 x)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst64 || auxIntToInt64(v_0.AuxInt) != -1 {
+				continue
+			}
+			x := v_1
+			v.reset(OpCom64)
+			v.AddArg(x)
+			return true
+		}
+		break
+	}
 	// match: (Xor64 x (Xor64 x y))
 	// result: y
 	for {
@ -25107,6 +25576,37 @@ func rewriteValuegeneric_OpXor8(v *Value) bool {
 		}
 		break
 	}
+	// match: (Xor8 (Com8 x) x)
+	// result: (Const8 [-1])
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpCom8 {
+				continue
+			}
+			x := v_0.Args[0]
+			if x != v_1 {
+				continue
+			}
+			v.reset(OpConst8)
+			v.AuxInt = int8ToAuxInt(-1)
+			return true
+		}
+		break
+	}
+	// match: (Xor8 (Const8 [-1]) x)
+	// result: (Com8 x)
+	for {
+		for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
+			if v_0.Op != OpConst8 || auxIntToInt8(v_0.AuxInt) != -1 {
+				continue
+			}
+			x := v_1
+			v.reset(OpCom8)
+			v.AddArg(x)
+			return true
+		}
+		break
+	}
 	// match: (Xor8 x (Xor8 x y))
 	// result: y
 	for {
--- a/src/cmd/compile/internal/ssa/schedule.go
+++ b/src/cmd/compile/internal/ssa/schedule.go
@ -338,13 +338,15 @@ func schedule(f *Func) {
 // if v transitively depends on store s, v is ordered after s,
 // otherwise v is ordered before s.
 // Specifically, values are ordered like
-//   store1
-//   NilCheck that depends on store1
-//   other values that depends on store1
-//   store2
-//   NilCheck that depends on store2
-//   other values that depends on store2
-//   ...
+//
+//	store1
+//	NilCheck that depends on store1
+//	other values that depends on store1
+//	store2
+//	NilCheck that depends on store2
+//	other values that depends on store2
+//	...
+//
 // The order of non-store and non-NilCheck values are undefined
 // (not necessarily dependency order). This should be cheaper
 // than a full scheduling as done above.
--- a/Show more
+++ b/Show more
				`@ -0,0 +1 @@`
				`pkg crypto/x509, method (CertPool) Clone() CertPool #35044`
				`@ -0,0 +1 @@`
				`pkg sort, func Find(int, func(int) int) (int, bool) #50340`