[dev.boringcrypto] all: merge master into dev.boringcrypto

Change-Id: Idd59c37d2fd759b0f73d2ee01b30f72ef4e9aee8
2025-12-08 06:10:04 +00:00 · 2020-05-06 00:20:47 -04:00 · 2020-05-06 00:20:47 -04:00 · a9d2e3abf7
commit a9d2e3abf7
parent c19c0a047b c9d5f60eaa
995 changed files with 118107 additions and 139260 deletions
--- a/4
+++ b/4
@ -96,6 +96,7 @@ Alexei Sholik <alcosholik@gmail.com>
 Alexey Borzenkov <snaury@gmail.com>
 Alexey Neganov <neganovalexey@gmail.com>
 Alexey Palazhchenko <alexey.palazhchenko@gmail.com>
 Alexey Semenyuk <alexsemenyuk88@gmail.com>
 Alexis Hildebrandt <surryhill@gmail.com>
 Ali Rizvi-Santiago <arizvisa@gmail.com>
 Aliaksandr Valialkin <valyala@gmail.com>
@ -144,6 +145,7 @@ Andy Davis <andy@bigandian.com>
 Andy Finkenstadt <afinkenstadt@zynga.com>
 Andy Lindeman <andy@lindeman.io>
 Andy Maloney <asmaloney@gmail.com>
 Andy Pan <panjf2000@gmail.com>
 Andy Walker <walkeraj@gmail.com>
 Anfernee Yongkun Gui <anfernee.gui@gmail.com>
 Angelo Bulfone <mbulfone@gmail.com>
@ -1143,6 +1145,7 @@ Pontus Leitzler <leitzler@gmail.com>
 Prashant Varanasi <prashant@prashantv.com>
 Pravendra Singh <hackpravj@gmail.com>
 Preetam Jinka <pj@preet.am>
 Qais Patankar <qaisjp@gmail.com>
 Qiuxuan Zhu <ilsh1022@gmail.com>
 Qualcomm Data Center, Inc.
 Quan Tran <qeed.quan@gmail.com>
@ -1308,6 +1311,7 @@ Szabolcs Nagy <nsz@port70.net>
 Taavi Kivisik <taavi.kivisik@gmail.com>
 Tad Fisher <tadfisher@gmail.com>
 Tad Glines <tad.glines@gmail.com>
 Tailscale Inc.
 Taj Khattra <taj.khattra@gmail.com>
 Takayoshi Nishida <takayoshi.nishida@gmail.com>
 Takeshi YAMANASHI <9.nashi@gmail.com>
--- a/2
+++ b/2
@ -145,6 +145,7 @@ Alexey Borzenkov <snaury@gmail.com>
 Alexey Naidonov <alexey.naidyonov@gmail.com>
 Alexey Neganov <neganovalexey@gmail.com>
 Alexey Palazhchenko <alexey.palazhchenko@gmail.com>
 Alexey Semenyuk <alexsemenyuk88@gmail.com>
 Alexis Hildebrandt <surryhill@gmail.com>
 Alexis Hunt <lexer@google.com>
 Alexis Imperial-Legrand <ail@google.com>
@ -216,6 +217,7 @@ Andy Davis <andy@bigandian.com>
 Andy Finkenstadt <afinkenstadt@zynga.com>
 Andy Lindeman <andy@lindeman.io>
 Andy Maloney <asmaloney@gmail.com>
 Andy Pan <panjf2000@gmail.com>
 Andy Walker <walkeraj@gmail.com>
 Andzej Maciusovic <andzej.maciusovic@gmail.com>
 Anfernee Yongkun Gui <anfernee.gui@gmail.com>
--- a/api/except.txt
+++ b/api/except.txt
@ -8,10 +8,6 @@ pkg os (linux-arm), const O_SYNC = 4096
 pkg os (linux-arm-cgo), const O_SYNC = 4096
 pkg os (linux-arm), const O_SYNC = 1052672
 pkg os (linux-arm-cgo), const O_SYNC = 1052672
 pkg syscall (darwin-386), const ImplementsGetwd = false
 pkg syscall (darwin-386), func Fchflags(string, int) error
 pkg syscall (darwin-386-cgo), const ImplementsGetwd = false
 pkg syscall (darwin-386-cgo), func Fchflags(string, int) error
 pkg syscall (darwin-amd64), const ImplementsGetwd = false
 pkg syscall (darwin-amd64), func Fchflags(string, int) error
 pkg syscall (darwin-amd64-cgo), const ImplementsGetwd = false
--- a/doc/asm.html
+++ b/doc/asm.html
@ -437,6 +437,31 @@ This is a wrapper function and should not count as disabling <code>recover</code
 (For <code>TEXT</code> items.)
 This function is a closure so it uses its incoming context register.
 </li>
 <li>
 <code>LOCAL</code> = 128
 <br>
 This symbol is local to the dynamic shared object.
 </li>
 <li>
 <code>TLSBSS</code> = 256
 <br>
 (For <code>DATA</code> and <code>GLOBL</code> items.)
 Put this data in thread local storage.
 </li>
 <li>
 <code>NOFRAME</code> = 512
 <br>
 (For <code>TEXT</code> items.)
 Do not insert instructions to allocate a stack frame and save/restore the return
 address, even if this is not a leaf function.
 Only valid on functions that declare a frame size of 0.
 </li>
 <li>
 <code>TOPFRAME</code> = 2048
 <br>
 (For <code>TEXT</code> items.)
 Function is the top of the call stack. Traceback should stop at this function.
 </li>
 </ul>
 <h3 id="runtime">Runtime Coordination</h3>
--- a/doc/contrib.html
+++ b/doc/contrib.html
@ -1,133 +0,0 @@
 <!--{
 	"Title": "The Go Project",
 	"Path": "/project/"
 }-->
 <img class="gopher" src="/doc/gopher/project.png" />
 <div id="manual-nav"></div>
 <p>
 Go is an open source project developed by a team at
 <a href="//google.com/">Google</a> and many
 <a href="/CONTRIBUTORS">contributors</a> from the open source community.
 </p>
 <p>
 Go is distributed under a <a href="/LICENSE">BSD-style license</a>.
 </p>
 <h3 id="announce"><a href="//groups.google.com/group/golang-announce">Announcements Mailing List</a></h3>
 <p>
 A low traffic mailing list for important announcements, such as new releases.
 </p>
 <p>
 We encourage all Go users to subscribe to
 <a href="//groups.google.com/group/golang-announce">golang-announce</a>.
 </p>
 <h2 id="go1">Version history</h2>
 <h3 id="release"><a href="/doc/devel/release.html">Release History</a></h3>
 <p>A <a href="/doc/devel/release.html">summary</a> of the changes between Go releases. Notes for the major releases:</p>
 <ul>
 	<li><a href="/doc/go1.14">Go 1.14</a> <small>(February 2020)</small></li>
 	<li><a href="/doc/go1.13">Go 1.13</a> <small>(September 2019)</small></li>
 	<li><a href="/doc/go1.12">Go 1.12</a> <small>(February 2019)</small></li>
 	<li><a href="/doc/go1.11">Go 1.11</a> <small>(August 2018)</small></li>
 	<li><a href="/doc/go1.10">Go 1.10</a> <small>(February 2018)</small></li>
 	<li><a href="/doc/go1.9">Go 1.9</a> <small>(August 2017)</small></li>
 	<li><a href="/doc/go1.8">Go 1.8</a> <small>(February 2017)</small></li>
 	<li><a href="/doc/go1.7">Go 1.7</a> <small>(August 2016)</small></li>
 	<li><a href="/doc/go1.6">Go 1.6</a> <small>(February 2016)</small></li>
 	<li><a href="/doc/go1.5">Go 1.5</a> <small>(August 2015)</small></li>
 	<li><a href="/doc/go1.4">Go 1.4</a> <small>(December 2014)</small></li>
 	<li><a href="/doc/go1.3">Go 1.3</a> <small>(June 2014)</small></li>
 	<li><a href="/doc/go1.2">Go 1.2</a> <small>(December 2013)</small></li>
 	<li><a href="/doc/go1.1">Go 1.1</a> <small>(May 2013)</small></li>
 	<li><a href="/doc/go1">Go 1</a> <small>(March 2012)</small></li>
 </ul>
 <h3 id="go1compat"><a href="/doc/go1compat">Go 1 and the Future of Go Programs</a></h3>
 <p>
 What Go 1 defines and the backwards-compatibility guarantees one can expect as
 Go 1 matures.
 </p>
 <h2 id="resources">Developer Resources</h2>
 <h3 id="source"><a href="https://golang.org/change">Source Code</a></h3>
 <p>Check out the Go source code.</p>
 <h3 id="discuss"><a href="//groups.google.com/group/golang-nuts">Discussion Mailing List</a></h3>
 <p>
 A mailing list for general discussion of Go programming.
 </p>
 <p>
 Questions about using Go or announcements relevant to other Go users should be sent to
 <a href="//groups.google.com/group/golang-nuts">golang-nuts</a>.
 </p>
 <h3 id="golang-dev"><a href="https://groups.google.com/group/golang-dev">Developer</a> and
 <a href="https://groups.google.com/group/golang-codereviews">Code Review Mailing List</a></h3>
 <p>The <a href="https://groups.google.com/group/golang-dev">golang-dev</a>
 mailing list is for discussing code changes to the Go project.
 The <a href="https://groups.google.com/group/golang-codereviews">golang-codereviews</a>
 mailing list is for actual reviewing of the code changes (CLs).</p>
 <h3 id="golang-checkins"><a href="https://groups.google.com/group/golang-checkins">Checkins Mailing List</a></h3>
 <p>A mailing list that receives a message summarizing each checkin to the Go repository.</p>
 <h3 id="build_status"><a href="//build.golang.org/">Build Status</a></h3>
 <p>View the status of Go builds across the supported operating
 systems and architectures.</p>
 <h2 id="howto">How you can help</h2>
 <h3><a href="//golang.org/issue">Reporting issues</a></h3>
 <p>
 If you spot bugs, mistakes, or inconsistencies in the Go project's code or
 documentation, please let us know by
 <a href="//golang.org/issue/new">filing a ticket</a>
 on our <a href="//golang.org/issue">issue tracker</a>.
 (Of course, you should check it's not an existing issue before creating
 a new one.)
 </p>
 <p>
 We pride ourselves on being meticulous; no issue is too small.
 </p>
 <p>
 Security-related issues should be reported to
 <a href="mailto:security@golang.org">security@golang.org</a>.<br>
 See the <a href="/security">security policy</a> for more details.
 </p>
 <p>
 Community-related issues should be reported to
 <a href="mailto:conduct@golang.org">conduct@golang.org</a>.<br>
 See the <a href="/conduct">Code of Conduct</a> for more details.
 </p>
 <h3><a href="/doc/contribute.html">Contributing code &amp; documentation</a></h3>
 <p>
 Go is an open source project and we welcome contributions from the community.
 </p>
 <p>
 To get started, read these <a href="/doc/contribute.html">contribution
 guidelines</a> for information on design, testing, and our code review process.
 </p>
 <p>
 Check <a href="//golang.org/issue">the tracker</a> for
 open issues that interest you. Those labeled
 <a href="https://github.com/golang/go/issues?q=is%3Aopen+is%3Aissue+label%3A%22help+wanted%22">help wanted</a>
 are particularly in need of outside help.
 </p>
--- a/doc/debugging_with_gdb.html
+++ b/doc/debugging_with_gdb.html
@ -18,13 +18,13 @@ Gccgo has native gdb support.
 </p>
 <p>
 Note that 
-<a href="https://github.com/derekparker/delve">Delve</a> is a better 
+<a href="https://github.com/go-delve/delve">Delve</a> is a better
 alternative to GDB when debugging Go programs built with the standard
 toolchain. It understands the Go runtime, data structures, and
 expressions better than GDB. Delve currently supports Linux, OSX,
 and Windows on <code>amd64</code>.
 For the most up-to-date list of supported platforms, please see
-<a href="https://github.com/derekparker/delve/tree/master/Documentation/installation">
+<a href="https://github.com/go-delve/delve/tree/master/Documentation/installation">
 the Delve documentation</a>.
 </p>
 </i>
--- a/doc/go1.15.html
+++ b/doc/go1.15.html
@ -31,6 +31,24 @@ TODO
 <h2 id="ports">Ports</h2>
 <h3 id="darwin">Darwin</h3>
 <p> <!-- golang.org/issue/37610, golang.org/issue/37611 -->
  As <a href="/doc/go1.14#darwin">announced</a> in the Go 1.14 release
  notes, Go 1.15 drops support for 32-bit binaries on macOS, iOS,
  iPadOS, watchOS, and tvOS (the <code>darwin/386</code>
  and <code>darwin/arm</code> ports). Go continues to support the
  64-bit <code>darwin/amd64</code> and <code>darwin/arm64</code> ports.
 </p>
 <h3 id="windows">Windows</h3>
 <p> <!-- CL 214397 and CL 230217 -->
  Go 1.15 now generates Windows ASLR executables when -buildmode=pie
  cmd/link flag is provided. Go command uses -buildmode=pie by default
  on Windows.
 </p>
 <p>
 TODO
 </p>
@ -78,15 +96,61 @@ TODO
  directory of each individual test).
 </p>
 <h4 id="module-cache">Module cache</h4>
 <p><!-- https://golang.org/cl/219538 -->
  The location of the module cache may now be set with
  the <code>GOMODCACHE</code> environment variable. The default value of
  <code>GOMODCACHE</code> is <code>GOPATH[0]/pkg/mod</code>, the location of the
  module cache before this change.
 </p>
 <p><!-- https://golang.org/cl/221157 -->
  A workaround is now available for Windows "Access is denied" errors in
  <code>go</code> commands that access the module cache, caused by external
  programs concurrently scanning the file system (see
  <a href="https://golang.org/issue/36568">issue #36568</a>). The workaround is
  not enabled by default because it is not safe to use when Go versions lower
  than 1.14.2 and 1.13.10 are running concurrently with the same module cache.
  It can be enabled by explictly setting the environment variable
  <code>GODEBUG=modcacheunzipinplace=1</code>.
 </p>
 <h2 id="runtime">Runtime</h2>
 <p>
 TODO
 </p>
 <h2 id="compiler">Compiler</h2>
 <p><!-- https://golang.org/cl/229578 -->
  Package <code>unsafe</code>'s <a href="/pkg/unsafe/#Pointer">safety
  rules</a> allow converting an <code>unsafe.Pointer</code>
  into <code>uintptr</code> when calling certain
  functions. Previously, in some cases, the compiler allowed multiple
  chained conversions (for example, <code>syscall.Syscall(…,
  uintptr(uintptr(ptr)), …)</code>). The compiler now requires exactly
  one conversion. Code that used multiple conversions should be
  updated to satisfy the safety rules.
 </p>
 <h2 id="library">Core library</h2>
 <h3 id="time/tzdata">New embedded tzdata package</h3>
 <p> <!-- CL 224588 -->
  Go 1.15 includes a new package,
  <a href="/pkg/time/tzdata/"><code>time/tzdata</code></a>,
  that permits embedding the timezone database into a program.
  Importing this package (as <code>import _ "time/tzdata"</code>)
  permits the program to find timezone information even if the
  timezone database is not available on the local system.
  You can also embed the timezone database by building
  with <code>-tags timetzdata</code>.
  Either approach increases the size of the program by about 800 KB.
 </p>
 <p>
 TODO
 </p>
@ -119,6 +183,127 @@ TODO
 TODO
 </p>
 <dl id="crypto/tls"><dt><a href="/crypto/tls/">crypto/tls</a></dt>
  <dd>
    <p><!-- CL 214977 -->
      The new
      <a href="/pkg/crypto/tls/#Dialer"><code>Dialer</code></a>
      type and its
      <a href="/pkg/crypto/tls/#Dialer.DialContext"><code>DialContext</code></a>
      method permits using a context to both connect and handshake with a TLS server.
    </p>
  </dd>
 </dl>
 <dl id="flag"><dt><a href="/pkg/flag/">flag</a></dt>
  <dd>
    <p><!-- CL 221427 -->
      When the flag package sees <code>-h</code> or <code>-help</code>, and
      those flags are not defined, the flag package prints a usage message.
      If the <a href="/pkg/flag/#FlagSet"><code>FlagSet</code></a> was created with
      <a href="/pkg/flag/#ExitOnError"><code>ExitOnError</code></a>,
      <a href="/pkg/flag/#FlagSet.Parse"><code>FlagSet.Parse</code></a> would then
      exit with a status of 2. In this release, the exit status for <code>-h</code>
      or <code>-help</code> has been changed to 0. In particular, this applies to
      the default handling of command line flags.
    </p>
  </dd>
 </dl>
 <dl id="net"><dt><a href="/pkg/net/">net</a></dt>
  <dd>
    <p><!-- CL 228645 -->
      If an I/O operation exceeds a deadline set by
      the <a href="/pkg/net/#Conn"><code>Conn.SetDeadline</code></a>,
      <code>Conn.SetReadDeadline</code>,
      or <code>Conn.SetWriteDeadline</code> methods, it will now
      return an error that is or wraps
      <a href="/pkg/os#ErrDeadlineExceeded"><code>os.ErrDeadlineExceeded</code></a>.
      This may be used to reliably detect whether an error is due to
      an exceeded deadline.
      Earlier releases recommended calling the <code>Timeout</code>
      method on the error, but I/O operations can return errors for
      which <code>Timeout</code> returns <code>true</code> although a
      deadline has not been exceeded.
    </p>
    <p><!-- CL 228641 -->
      The new <a href="/pkg/net/#Resolver.LookupIP"><code>Resolver.LookupIP</code></a>
      method supports IP lookups that are both network-specific and accept a context.
    </p>
  </dd>
 </dl>
 <dl id="net/http/httputil"><dt><a href="/pkg/net/http/httputil/">net/http/httputil</a></dt>
  <dd>
    <p><!-- CL 230937 -->
      <a href="/pkg/net/http/httputil/#ReverseProxy"><code>ReverseProxy</code></a>
      now supports not modifying the <code>X-Forwarded-For</code>
      header when the incoming <code>Request.Header</code> map entry
      for that field is <code>nil</code>.
    </p>
  </dd>
 </dl>
 <dl id="net/http/pprof"><dt><a href="/pkg/net/http/pprof/">net/http/pprof</a></dt>
  <dd>
    <p><!-- CL 147598, CL 229537 -->
      All profile endpoints now support a "<code>seconds</code>" parameter. When present,
      the endpoint profiles for the specified number of seconds and reports the difference.
      The meaning of the "<code>seconds</code>" parameter in the <code>cpu</code> profile and
      the trace endpoints is unchanged.
    </p>
  </dd>
 </dl>
 <dl id="net/url"><dt><a href="/pkg/net/url/">net/url</a></dt>
  <dd>
    <p><!-- CL 227645 -->
      The new <a href="/pkg/net/url/#URL"><code>URL</code></a> field
      <code>RawFragment</code> and method <a href="/pkg/net/url/#URL.EscapedFragment"><code>EscapedFragment</code></a>
      provide detail about and control over the exact encoding of a particular fragment.
      These are analogous to
      <code>RawPath</code> and <a href="/pkg/net/url/#URL.EscapedPath"><code>EscapedPath</code></a>.
    </p>
    <p><!-- CL 207082 -->
      The new <a href="/pkg/net/url/#URL"><code>URL</code></a>
      method <a href="/pkg/net/url/#URL.Redacted"><code>Redacted</code></a>
      returns the URL in string form with any password replaced with <code>xxxxx</code>.
    </p>
  </dd>
 </dl>
 <dl id="os"><dt><a href="/pkg/os/">os</a></dt>
  <dd>
    <p><!-- CL -->
      If an I/O operation exceeds a deadline set by
      the <a href="/pkg/os/#File.SetDeadline"><code>File.SetDeadline</code></a>,
      <a href="/pkg/os/#File.SetReadDeadline"><code>File.SetReadDeadline</code></a>,
      or <a href="/pkg/os/#File.SetWriteDeadline"><code>File.SetWriteDeadline</code></a>
      methods, it will now return an error that is or wraps
      <a href="/pkg/os#ErrDeadlineExceeded"><code>os.ErrDeadlineExceeded</code></a>.
      This may be used to reliably detect whether an error is due to
      an exceeded deadline.
      Earlier releases recommended calling the <code>Timeout</code>
      method on the error, but I/O operations can return errors for
      which <code>Timeout</code> returns <code>true</code> although a
      deadline has not been exceeded.
    </p>
  </dd>
 </dl>
 <dl id="reflect"><dt><a href="/pkg/reflect/">reflect</a></dt>
  <dd>
    <p><!-- CL 228902 -->
      Package reflect now disallows accessing methods of all
      non-exported fields, whereas previously it allowed accessing
      those of non-exported, embedded fields. Code that relies on the
      previous behavior should be updated to instead access the
      corresponding promoted method of the enclosing variable.
    </p>
  </dd>
 </dl>
 <dl id="pkg-runtime"><dt><a href="/pkg/runtime/">runtime</a></dt>
  <dd>
    <p><!-- CL 221779 -->
@ -128,28 +313,83 @@ TODO
      <code>uint</code>, <code>uint8</code>, <code>uint16</code>, <code>uint32</code>, <code>uint64</code>, <code>uintptr</code>,
      then the value will be printed, instead of just its address.
    </p>
    <p><!-- CL -->
      On a Unix system, if the <code>kill</code> command
      or <code>kill</code> system call is used to send
      a <code>SIGSEGV</code>, <code>SIGBUS</code>,
      or <code>SIGFPE</code> signal to a Go program, and if the signal
      is not being handled via
      <a href="/pkg/os/signal/#Notify"><code>os/signal.Notify</code></a>,
      the Go program will now reliably crash with a stack trace.
      In earlier releases the behavior was unpredictable.
    </p>
  </dd>
 </dl>
 <dl id="pkg-runtime-pprof"><dt><a href="/pkg/runtime/pprof">runtime/pprof</a></dt>
  <dd>
    <p><!-- CL 189318 -->
      The goroutine profile includes the profile labels associated with each goroutine
      at the time of profiling. This feature is not yet implemented for the profile
      reported with <code>debug=2</code>.
    </p>
  </dd>
 </dl>
 <dl id="sync"><dt><a href="/pkg/sync/">sync</a></dt>
  <dd>
-    <p><!-- golang.org/issue/33762 -->
+    <p><!-- CL 205899, golang.org/issue/33762 -->
      The new method
-      <a href="/pkg/sync#Map.LoadAndDelete"><code>Map.LoadAndDelete</code></a>
+      <a href="/pkg/sync/#Map.LoadAndDelete"><code>Map.LoadAndDelete</code></a>
      atomically deletes a key and returns the previous value if present.
    </p>
    <p><!-- CL 205899 -->
      The method
-      <a href="/pkg/sync#Map.Delete"><code>Map.Delete</code></a>
+      <a href="/pkg/sync/#Map.Delete"><code>Map.Delete</code></a>
      is more efficient.
    </p>
 </dl><!-- sync -->
 <dl id="syscall"><dt><a href="/pkg/syscall/">syscall</a></dt>
  <dd>
    <p><!-- CL 231638 -->
      On Unix systems, functions that use
      <a href="/pkg/syscall/#SysProcAttr"><code>SysProcAttr</code></a>
      will now reject attempts to set both the <code>Setctty</code>
      and <code>Foreground</code> fields, as they both use
      the <code>Ctty</code> field but do so in incompatible ways.
      We expect that few existing programs set both fields.
    </p>
    <p>
      Setting the <code>Setctty</code> field now requires that the
      <code>Ctty</code> field be set to a file descriptor number in the
      child process, as determined by the <code>ProcAttr.Files</code> field.
      Using a child descriptor always worked, but there were certain
      cases where using a parent file descriptor also happened to work.
      Some programs that set <code>Setctty</code> will need to change
      the value of <code>Ctty</code> to use a child descriptor number.
    </p>
  </dd>
 </dl>
 <dl id="testing"><dt><a href="/pkg/testing/">testing</a></dt>
  <dd>
    <p><!-- CL 226877, golang.org/issue/35998 -->
       The new methods
       <a href="/pkg/testing/#T.TempDir"><code>T.TempDir</code></a> and
       <a href="/pkg/testing/#B.TempDir"><code>B.TempDir</code></a> and
       return temporary directories that are automatically cleaned up
       at the end of the test.
    </p>
  </dd>
 </dl><!-- testing -->
 <dl id="time"><dt><a href="/pkg/time/">time</a></dt>
  <dd>
-    <p><!-- golang.org/issue/33184 -->
+    <p><!-- CL 220424, CL 217362, golang.org/issue/33184 -->
       The new method
-       <a href="/pkg/time#Ticker.Reset"><code>Ticker.Reset</code></a>
+       <a href="/pkg/time/#Ticker.Reset"><code>Ticker.Reset</code></a>
       supports changing the duration of a ticker.
    </p>
  </dd>
--- a/doc/install-source.html
+++ b/doc/install-source.html
@ -540,15 +540,9 @@ The valid combinations of <code>$GOOS</code> and <code>$GOARCH</code> are:
 <td></td><td><code>android</code></td> <td><code>arm64</code></td>
 </tr>
 <tr>
 <td></td><td><code>darwin</code></td> <td><code>386</code></td>
 </tr>
 <tr>
 <td></td><td><code>darwin</code></td> <td><code>amd64</code></td>
 </tr>
 <tr>
 <td></td><td><code>darwin</code></td> <td><code>arm</code></td>
 </tr>
 <tr>
 <td></td><td><code>darwin</code></td> <td><code>arm64</code></td>
 </tr>
 <tr>
--- a/lib/time/update.bash
+++ b/lib/time/update.bash
@ -8,8 +8,8 @@
 # Consult https://www.iana.org/time-zones for the latest versions.
 # Versions to use.
-CODE=2019c
+CODE=2020a
-DATA=2019c
+DATA=2020a
 set -e
 rm -rf work
@ -28,6 +28,8 @@ rm -f ../../zoneinfo.zip
 zip -0 -r ../../zoneinfo.zip *
 cd ../..
 go generate time/tzdata
 echo
 if [ "$1" = "-work" ]; then
 	echo Left workspace behind in work/.
--- a/lib/time/zoneinfo.zip
+++ b/lib/time/zoneinfo.zip
--- a/misc/cgo/test/pkg_test.go
+++ b/misc/cgo/test/pkg_test.go
@ -32,7 +32,7 @@ func TestCrossPackageTests(t *testing.T) {
 		t.Skip("Can't exec cmd/go subprocess on Android.")
 	case "darwin":
 		switch runtime.GOARCH {
-		case "arm", "arm64":
+		case "arm64":
 			t.Skip("Can't exec cmd/go subprocess on iOS.")
 		}
 	}
--- a/misc/cgo/test/sigaltstack.go
+++ b/misc/cgo/test/sigaltstack.go
@ -62,10 +62,8 @@ import (
 func testSigaltstack(t *testing.T) {
 	switch {
-	case runtime.GOOS == "solaris", runtime.GOOS == "illumos", runtime.GOOS == "darwin" && (runtime.GOARCH == "arm" || runtime.GOARCH == "arm64"):
+	case runtime.GOOS == "solaris", runtime.GOOS == "illumos", runtime.GOOS == "darwin" && runtime.GOARCH == "arm64":
 		t.Skipf("switching signal stack not implemented on %s/%s", runtime.GOOS, runtime.GOARCH)
 	case runtime.GOOS == "darwin" && runtime.GOARCH == "386":
 		t.Skipf("sigaltstack fails on darwin/386")
 	}
 	C.changeSignalStack()
--- a/misc/cgo/test/test.go
+++ b/misc/cgo/test/test.go
@ -897,6 +897,10 @@ static uint16_t issue31093F(uint16_t v) { return v; }
 // issue 32579
 typedef struct S32579 { unsigned char data[1]; } S32579;
 // issue 38649
 // Test that #define'd type aliases work.
 #define netbsd_gid unsigned int
 */
 import "C"
@ -2192,3 +2196,7 @@ func test32579(t *testing.T) {
 		t.Errorf("&s[0].data[0] failed: got %d, want %d", s[0].data[0], 1)
 	}
 }
 // issue 38649
 var issue38649 C.netbsd_gid = 42
--- a/misc/cgo/test/testx.go
+++ b/misc/cgo/test/testx.go
@ -124,6 +124,11 @@ typedef struct {
 } Issue31891B;
 void callIssue31891(void);
 typedef struct {
 	int i;
 } Issue38408, *PIssue38408;
 */
 import "C"
@ -159,7 +164,7 @@ func Add(x int) {
 }
 func testCthread(t *testing.T) {
-	if runtime.GOOS == "darwin" && (runtime.GOARCH == "arm" || runtime.GOARCH == "arm64") {
+	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
 		t.Skip("the iOS exec wrapper is unable to properly handle the panic from Add")
 	}
 	sum.i = 0
@ -552,3 +557,8 @@ func useIssue31891B(c *C.Issue31891B) {}
 func test31891(t *testing.T) {
 	C.callIssue31891()
 }
 // issue 38408
 // A typedef pointer can be used as the element type.
 // No runtime test; just make sure it compiles.
 var _ C.PIssue38408 = &C.Issue38408{i: 1}
--- a/misc/cgo/testcarchive/carchive_test.go
+++ b/misc/cgo/testcarchive/carchive_test.go
@ -134,7 +134,7 @@ func testMain(m *testing.M) int {
 	} else {
 		switch GOOS {
 		case "darwin":
-			if GOARCH == "arm" || GOARCH == "arm64" {
+			if GOARCH == "arm64" {
 				libbase += "_shared"
 			}
 		case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris", "illumos":
@ -305,7 +305,7 @@ func TestEarlySignalHandler(t *testing.T) {
 	switch GOOS {
 	case "darwin":
 		switch GOARCH {
-		case "arm", "arm64":
+		case "arm64":
 			t.Skipf("skipping on %s/%s; see https://golang.org/issue/13701", GOOS, GOARCH)
 		}
 	case "windows":
@ -487,7 +487,7 @@ func checkSignalForwardingTest(t *testing.T) {
 	switch GOOS {
 	case "darwin":
 		switch GOARCH {
-		case "arm", "arm64":
+		case "arm64":
 			t.Skipf("skipping on %s/%s; see https://golang.org/issue/13701", GOOS, GOARCH)
 		}
 	case "windows":
@ -603,7 +603,7 @@ func TestExtar(t *testing.T) {
 	if runtime.Compiler == "gccgo" {
 		t.Skip("skipping -extar test when using gccgo")
 	}
-	if runtime.GOOS == "darwin" && (runtime.GOARCH == "arm" || runtime.GOARCH == "arm64") {
+	if runtime.GOOS == "darwin" && runtime.GOARCH == "arm64" {
 		t.Skip("shell scripts are not executable on iOS hosts")
 	}
--- a/misc/cgo/testcshared/cshared_test.go
+++ b/misc/cgo/testcshared/cshared_test.go
@ -108,7 +108,7 @@ func testMain(m *testing.M) int {
 	libgodir := GOOS + "_" + GOARCH
 	switch GOOS {
 	case "darwin":
-		if GOARCH == "arm" || GOARCH == "arm64" {
+		if GOARCH == "arm64" {
 			libgodir += "_shared"
 		}
 	case "dragonfly", "freebsd", "linux", "netbsd", "openbsd", "solaris", "illumos":
--- a/misc/cgo/testgodefs/testdata/issue38649.go
+++ b/misc/cgo/testgodefs/testdata/issue38649.go
@ -0,0 +1,15 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
 // +build ignore
 package main
 /*
 struct Issue38649 { int x; };
 #define issue38649 struct Issue38649
 */
 import "C"
 type issue38649 C.issue38649
--- a/misc/cgo/testgodefs/testdata/main.go
+++ b/misc/cgo/testgodefs/testdata/main.go
@ -19,5 +19,8 @@ var v6 = B{}
 // Test that S is fully defined
 var v7 = S{}
 // Test that #define'd type is fully defined
 var _ = issue38649{X: 0}
 func main() {
 }
--- a/misc/cgo/testgodefs/testgodefs_test.go
+++ b/misc/cgo/testgodefs/testgodefs_test.go
@ -23,6 +23,7 @@ var filePrefixes = []string{
 	"fieldtypedef",
 	"issue37479",
 	"issue37621",
 	"issue38649",
 }
 func TestGoDefs(t *testing.T) {
--- a/misc/cgo/testso/so_test.go
+++ b/misc/cgo/testso/so_test.go
@ -20,7 +20,7 @@ import (
 func requireTestSOSupported(t *testing.T) {
 	t.Helper()
 	switch runtime.GOARCH {
-	case "arm", "arm64":
+	case "arm64":
 		if runtime.GOOS == "darwin" {
 			t.Skip("No exec facility on iOS.")
 		}
--- a/misc/cgo/testsovar/so_test.go
+++ b/misc/cgo/testsovar/so_test.go
@ -20,7 +20,7 @@ import (
 func requireTestSOSupported(t *testing.T) {
 	t.Helper()
 	switch runtime.GOARCH {
-	case "arm", "arm64":
+	case "arm64":
 		if runtime.GOOS == "darwin" {
 			t.Skip("No exec facility on iOS.")
 		}
--- a/misc/ios/clangwrap.sh
+++ b/misc/ios/clangwrap.sh
@ -8,9 +8,7 @@ export IPHONEOS_DEPLOYMENT_TARGET=5.1
 # cmd/cgo doesn't support llvm-gcc-4.2, so we have to use clang.
 CLANG=`xcrun --sdk $SDK --find clang`
-if [ "$GOARCH" == "arm" ]; then
+if [ "$GOARCH" == "arm64" ]; then
 	CLANGARCH="armv7"
 elif [ "$GOARCH" == "arm64" ]; then
 	CLANGARCH="arm64"
 else
 	echo "unknown GOARCH=$GOARCH" >&2
--- a/src/all.rc
+++ b/src/all.rc
@ -10,7 +10,7 @@ if(! test -f make.rc){
 	exit wrongdir
 }
-. ./make.rc --no-banner
+. ./make.rc --no-banner $*
 bind -b $GOROOT/bin /bin
 ./run.rc --no-rebuild
 $GOTOOLDIR/dist banner  # print build info
--- a/src/bufio/bufio.go
+++ b/src/bufio/bufio.go
@ -11,6 +11,7 @@ import (
 	"bytes"
 	"errors"
 	"io"
 	"strings"
 	"unicode/utf8"
 )
@ -419,20 +420,16 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) {
 	return
 }
-// ReadBytes reads until the first occurrence of delim in the input,
+// collectFragments reads until the first occurrence of delim in the input. It
-// returning a slice containing the data up to and including the delimiter.
+// returns (slice of full buffers, remaining bytes before delim, total number
-// If ReadBytes encounters an error before finding a delimiter,
+// of bytes in the combined first two elements, error).
-// it returns the data read before the error and the error itself (often io.EOF).
+// The complete result is equal to
-// ReadBytes returns err != nil if and only if the returned data does not end in
+// `bytes.Join(append(fullBuffers, finalFragment), nil)`, which has a
-// delim.
+// length of `totalLen`. The result is strucured in this way to allow callers
-// For simple uses, a Scanner may be more convenient.
+// to minimize allocations and copies.
-func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
+func (b *Reader) collectFragments(delim byte) (fullBuffers [][]byte, finalFragment []byte, totalLen int, err error) {
 	// Use ReadSlice to look for array,
 	// accumulating full buffers.
 	var frag []byte
-	var full [][]byte
+	// Use ReadSlice to look for delim, accumulating full buffers.
 	var err error
 	n := 0
 	for {
 		var e error
 		frag, e = b.ReadSlice(delim)
@ -447,12 +444,23 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
 		// Make a copy of the buffer.
 		buf := make([]byte, len(frag))
 		copy(buf, frag)
-		full = append(full, buf)
+		fullBuffers = append(fullBuffers, buf)
-		n += len(buf)
+		totalLen += len(buf)
 	}
-	n += len(frag)
+	totalLen += len(frag)
 	return fullBuffers, frag, totalLen, err
 }
 // ReadBytes reads until the first occurrence of delim in the input,
 // returning a slice containing the data up to and including the delimiter.
 // If ReadBytes encounters an error before finding a delimiter,
 // it returns the data read before the error and the error itself (often io.EOF).
 // ReadBytes returns err != nil if and only if the returned data does not end in
 // delim.
 // For simple uses, a Scanner may be more convenient.
 func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
 	full, frag, n, err := b.collectFragments(delim)
 	// Allocate new buffer to hold the full pieces and the fragment.
 	buf := make([]byte, n)
 	n = 0
@ -472,8 +480,16 @@ func (b *Reader) ReadBytes(delim byte) ([]byte, error) {
 // delim.
 // For simple uses, a Scanner may be more convenient.
 func (b *Reader) ReadString(delim byte) (string, error) {
-	bytes, err := b.ReadBytes(delim)
+	full, frag, n, err := b.collectFragments(delim)
-	return string(bytes), err
+	// Allocate new buffer to hold the full pieces and the fragment.
 	var buf strings.Builder
 	buf.Grow(n)
 	// Copy full pieces and fragment in.
 	for _, fb := range full {
 		buf.Write(fb)
 	}
 	buf.Write(frag)
 	return buf.String(), err
 }
 // WriteTo implements io.WriterTo.
--- a/src/bufio/bufio_test.go
+++ b/src/bufio/bufio_test.go
@ -535,6 +535,23 @@ func TestReadWriteRune(t *testing.T) {
 	}
 }
 func TestReadStringAllocs(t *testing.T) {
 	r := strings.NewReader("       foo       foo        42        42        42        42        42        42        42        42       4.2       4.2       4.2       4.2\n")
 	buf := NewReader(r)
 	allocs := testing.AllocsPerRun(100, func() {
 		r.Seek(0, io.SeekStart)
 		buf.Reset(r)
 		_, err := buf.ReadString('\n')
 		if err != nil {
 			t.Fatal(err)
 		}
 	})
 	if allocs != 1 {
 		t.Errorf("Unexpected number of allocations, got %f, want 1", allocs)
 	}
 }
 func TestWriter(t *testing.T) {
 	var data [8192]byte
@ -1644,6 +1661,21 @@ func BenchmarkReaderWriteToOptimal(b *testing.B) {
 	}
 }
 func BenchmarkReaderReadString(b *testing.B) {
 	r := strings.NewReader("       foo       foo        42        42        42        42        42        42        42        42       4.2       4.2       4.2       4.2\n")
 	buf := NewReader(r)
 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
 		r.Seek(0, io.SeekStart)
 		buf.Reset(r)
 		_, err := buf.ReadString('\n')
 		if err != nil {
 			b.Fatal(err)
 		}
 	}
 }
 func BenchmarkWriterCopyOptimal(b *testing.B) {
 	// Optimal case is where the underlying writer implements io.ReaderFrom
 	srcBuf := bytes.NewBuffer(make([]byte, 8192))
--- a/src/buildall.bash
+++ b/src/buildall.bash
@ -42,7 +42,7 @@ gettargets() {
 }
 selectedtargets() {
-	gettargets | egrep -v 'android-arm|darwin-arm' | egrep "$pattern"
+	gettargets | egrep -v 'android-arm|darwin-arm64' | egrep "$pattern"
 }
 # put linux first in the target list to get all the architectures up front.
--- a/src/bytes/bytes.go
+++ b/src/bytes/bytes.go
@ -445,8 +445,9 @@ func Fields(s []byte) [][]byte {
 // It splits the slice s at each run of code points c satisfying f(c) and
 // returns a slice of subslices of s. If all code points in s satisfy f(c), or
 // len(s) == 0, an empty slice is returned.
-// FieldsFunc makes no guarantees about the order in which it calls f(c).
+//
-// If f does not return consistent results for a given c, FieldsFunc may crash.
+// FieldsFunc makes no guarantees about the order in which it calls f(c)
 // and assumes that f always returns the same value for a given c.
 func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
 	// A span is used to record a slice of s of the form s[start:end].
 	// The start index is inclusive and the end index is exclusive.
@ -457,8 +458,10 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
 	spans := make([]span, 0, 32)
 	// Find the field start and end indices.
-	wasField := false
+	// Doing this in a separate pass (rather than slicing the string s
-	fromIndex := 0
+	// and collecting the result substrings right away) is significantly
 	// more efficient, possibly due to cache effects.
 	start := -1 // valid span start if >= 0
 	for i := 0; i < len(s); {
 		size := 1
 		r := rune(s[i])
@ -466,22 +469,21 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
 			r, size = utf8.DecodeRune(s[i:])
 		}
 		if f(r) {
-			if wasField {
+			if start >= 0 {
-				spans = append(spans, span{start: fromIndex, end: i})
+				spans = append(spans, span{start, i})
-				wasField = false
+				start = -1
 			}
 		} else {
-			if !wasField {
+			if start < 0 {
-				fromIndex = i
+				start = i
 				wasField = true
 			}
 		}
 		i += size
 	}
 	// Last field might end at EOF.
-	if wasField {
+	if start >= 0 {
-		spans = append(spans, span{fromIndex, len(s)})
+		spans = append(spans, span{start, len(s)})
 	}
 	// Create subslices from recorded field indices.
--- a/src/cmd/api/goapi.go
+++ b/src/cmd/api/goapi.go
@ -60,8 +60,6 @@ var contexts = []*build.Context{
 	{GOOS: "linux", GOARCH: "amd64"},
 	{GOOS: "linux", GOARCH: "arm", CgoEnabled: true},
 	{GOOS: "linux", GOARCH: "arm"},
 	{GOOS: "darwin", GOARCH: "386", CgoEnabled: true},
 	{GOOS: "darwin", GOARCH: "386"},
 	{GOOS: "darwin", GOARCH: "amd64", CgoEnabled: true},
 	{GOOS: "darwin", GOARCH: "amd64"},
 	{GOOS: "windows", GOARCH: "amd64"},
@ -252,6 +250,13 @@ func featureWithoutContext(f string) string {
 	return spaceParensRx.ReplaceAllString(f, "")
 }
 // portRemoved reports whether the given port-specific API feature is
 // okay to no longer exist because its port was removed.
 func portRemoved(feature string) bool {
 	return strings.Contains(feature, "(darwin-386)") ||
 		strings.Contains(feature, "(darwin-386-cgo)")
 }
 func compareAPI(w io.Writer, features, required, optional, exception []string, allowAdd bool) (ok bool) {
 	ok = true
@ -279,6 +284,8 @@ func compareAPI(w io.Writer, features, required, optional, exception []string, a
 				// acknowledged by being in the file
 				// "api/except.txt". No need to print them out
 				// here.
 			} else if portRemoved(feature) {
 				// okay.
 			} else if featureSet[featureWithoutContext(feature)] {
 				// okay.
 			} else {
@ -437,6 +444,11 @@ type listImports struct {
 var listCache sync.Map // map[string]listImports, keyed by contextName
 // listSem is a semaphore restricting concurrent invocations of 'go list'.
 var listSem = make(chan semToken, runtime.GOMAXPROCS(0))
 type semToken struct{}
 // loadImports populates w with information about the packages in the standard
 // library and the packages they themselves import in w's build context.
 //
@ -461,6 +473,9 @@ func (w *Walker) loadImports() {
 	imports, ok := listCache.Load(name)
 	if !ok {
 		listSem <- semToken{}
 		defer func() { <-listSem }()
 		cmd := exec.Command(goCmd(), "list", "-e", "-deps", "-json", "std")
 		cmd.Env = listEnv(w.context)
 		out, err := cmd.CombinedOutput()
--- a/src/cmd/api/goapi_test.go
+++ b/src/cmd/api/goapi_test.go
@ -140,7 +140,6 @@ func TestCompareAPI(t *testing.T) {
 			name: "contexts reconverging",
 			required: []string{
 				"A",
 				"pkg syscall (darwin-386), type RawSockaddrInet6 struct",
 				"pkg syscall (darwin-amd64), type RawSockaddrInet6 struct",
 			},
 			features: []string{
--- a/src/cmd/asm/internal/asm/endtoend_test.go
+++ b/src/cmd/asm/internal/asm/endtoend_test.go
@ -390,7 +390,12 @@ func TestARM64Errors(t *testing.T) {
 }
 func TestAMD64EndToEnd(t *testing.T) {
-	testEndToEnd(t, "amd64", "amd64")
+	defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64)
 	for _, goamd64 := range []string{"normaljumps", "alignedjumps"} {
 		t.Logf("GOAMD64=%s", goamd64)
 		objabi.GOAMD64 = goamd64
 		testEndToEnd(t, "amd64", "amd64")
 	}
 }
 func Test386Encoder(t *testing.T) {
--- a/src/cmd/asm/internal/asm/testdata/ppc64.s
+++ b/src/cmd/asm/internal/asm/testdata/ppc64.s
@ -748,6 +748,14 @@ label1:
 	COPY R2,R1
 	PASTECC R2,R1
 //	Modulo signed/unsigned double/word X-form
 //	<MNEMONIC> RA,RB,RT produces
 //	<mnemonic> RT,RA,RB
 	MODUD R3,R4,R5
 	MODUW R3,R4,R5
 	MODSD R3,R4,R5
 	MODSW R3,R4,R5
 //	VMX instructions
 //	Described as:
@ -950,12 +958,19 @@ label1:
 	VCMPGTSDCC  V3, V2, V1
 	VCMPNEZB    V3, V2, V1
 	VCMPNEZBCC  V3, V2, V1
 	VCMPNEB     V3, V2, V1
 	VCMPNEBCC   V3, V2, V1
 	VCMPNEH     V3, V2, V1
 	VCMPNEHCC   V3, V2, V1
 	VCMPNEW     V3, V2, V1
 	VCMPNEWCC   V3, V2, V1
 //	Vector permute, VA-form
 //	<MNEMONIC> VRA,VRB,VRC,VRT produces
 //	<mnemonic> VRT,VRA,VRB,VRC
 	VPERM V3, V2, V1, V0
 	VPERMXOR V3, V2, V1, V0
 	VPERMR V3, V2, V1, V0
 //	Vector bit permute, VX-form
 //	<MNEMONIC> VRA,VRB,VRT produces
@ -1019,6 +1034,9 @@ label1:
 	LXSIWAX	    (R1)(R2*1), VS0
 	LXSIWZX	    (R1)(R2*1), VS0
 // VSX load with length X-form (also left-justified)
 	LXVL        R3,R4, VS0
 	LXVLL       R3,R4, VS0
 // VSX load, DQ-form
 // <MNEMONIC> DQ(RA), XS produces
 // <mnemonic> XS, DQ(RA)
@ -1039,6 +1057,10 @@ label1:
 // <mnemonic> XS, DQ(RA)
 	STXV        VS63, -32752(R1)
 // VSX store with length, X-form (also left-justified)
 	STXVL	    VS0, R3,R4
 	STXVLL      VS0, R3,R4
 //	VSX move from VSR, XX1-form
 //	<MNEMONIC> XS,RA produces
 //	<mnemonic> RA,XS
@ -1076,6 +1098,7 @@ label1:
 	XXLNOR	    VS0,VS1,VS32
 	XXLORQ	    VS0,VS1,VS32
 	XXLXOR	    VS0,VS1,VS32
 	XXLOR       VS0,VS1,VS32
 //	VSX select, XX4-form
 //	<MNEMONIC> XA,XB,XC,XT produces
@ -1092,6 +1115,7 @@ label1:
 //	<MNEMONIC> XB,UIM,XT produces
 //	<mnemonic> XT,XB,UIM
 	XXSPLTW	    VS0,$3,VS32
 	XXSPLTIB    $26,VS0
 //      VSX permute, XX3-form
 //      <MNEMONIC> XA,XB,XT produces
@ -1108,6 +1132,14 @@ label1:
 //	<mnemonic> XT,XA,XB,SHW
 	XXSLDWI	    VS0,VS1,$3,VS32
 //	VSX byte-reverse XX2-form
 //	<MNEMONIC> XB,XT produces
 //	<mnemonic> XT,XB
 	XXBRQ       VS0,VS1
 	XXBRD       VS0,VS1
 	XXBRW       VS0,VS1
 	XXBRH       VS0,VS1
 //	VSX scalar FP-FP conversion, XX2-form
 //	<MNEMONIC> XB,XT produces
 //	<mnemonic> XT,XB
--- a/src/cmd/asm/internal/asm/testdata/ppc64enc.s
+++ b/src/cmd/asm/internal/asm/testdata/ppc64enc.s
@ -19,8 +19,66 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	MOVD $-32767, R5                // 38a08001
 	MOVD $-32768, R6                // 38c08000
 	MOVD $1234567, R5               // 6405001260a5d687
 	MOVW $1, R3                     // 38600001
 	MOVW $-1, R4                    // 3880ffff
 	MOVW $65535, R5                 // 6005ffff
 	MOVW $65536, R6                 // 64060001
 	MOVW $-32767, R5                // 38a08001
 	MOVW $-32768, R6                // 38c08000
 	MOVW $1234567, R5               // 6405001260a5d687
 	MOVD 8(R3), R4			// e8830008
 	MOVD (R3)(R4), R5               // 7ca4182a
 	MOVW 4(R3), R4                  // e8830006
 	MOVW (R3)(R4), R5               // 7ca41aaa
 	MOVWZ 4(R3), R4                 // 80830004
 	MOVWZ (R3)(R4), R5              // 7ca4182e
 	MOVH 4(R3), R4                  // a8830004
 	MOVH (R3)(R4), R5               // 7ca41aae
 	MOVHZ 2(R3), R4                 // a0830002
 	MOVHZ (R3)(R4), R5              // 7ca41a2e
 	MOVB 1(R3), R4                  // 888300017c840774
 	MOVB (R3)(R4), R5               // 7ca418ae7ca50774
 	MOVBZ 1(R3), R4                 // 88830001
 	MOVBZ (R3)(R4), R5              // 7ca418ae
 	MOVDBR (R3)(R4), R5             // 7ca41c28
 	MOVWBR (R3)(R4), R5             // 7ca41c2c
 	MOVHBR (R3)(R4), R5             // 7ca41e2c
 	MOVDU 8(R3), R4                 // e8830009
 	MOVDU (R3)(R4), R5              // 7ca4186a
 	MOVWU (R3)(R4), R5              // 7ca41aea
 	MOVWZU 4(R3), R4                // 84830004
 	MOVWZU (R3)(R4), R5             // 7ca4186e
 	MOVHU 2(R3), R4                 // ac830002
 	MOVHU (R3)(R4), R5              // 7ca41aee
 	MOVHZU 2(R3), R4                // a4830002
 	MOVHZU (R3)(R4), R5             // 7ca41a6e
 	MOVBU 1(R3), R4                 // 8c8300017c840774
 	MOVBU (R3)(R4), R5              // 7ca418ee7ca50774
 	MOVBZU 1(R3), R4                // 8c830001
 	MOVBZU (R3)(R4), R5             // 7ca418ee
 	MOVD R4, 8(R3)                  // f8830008
 	MOVD R5, (R3)(R4)               // 7ca4192a
 	MOVW R4, 4(R3)                  // 90830004
 	MOVW R5, (R3)(R4)               // 7ca4192e
 	MOVH R4, 2(R3)                  // b0830002
 	MOVH R5, (R3)(R4)               // 7ca41b2e
 	MOVB R4, 1(R3)                  // 98830001
 	MOVB R5, (R3)(R4)               // 7ca419ae
 	MOVDBR R5, (R3)(R4)             // 7ca41d28
 	MOVWBR R5, (R3)(R4)             // 7ca41d2c
 	MOVHBR R5, (R3)(R4)             // 7ca41f2c
 	MOVDU R4, 8(R3)                 // f8830009
 	MOVDU R5, (R3)(R4)              // 7ca4196a
 	MOVWU R4, 4(R3)                 // 94830004
 	MOVWU R5, (R3)(R4)              // 7ca4196e
 	MOVHU R4, 2(R3)                 // b4830002
 	MOVHU R5, (R3)(R4)              // 7ca41b6e
 	MOVBU R4, 1(R3)                 // 9c830001
 	MOVBU R5, (R3)(R4)              // 7ca419ee
 	// add constants
 	ADD $1, R3                      // 38630001
 	ADD $1, R3, R4                  // 38830001
 	ADD $-1, R4                     // 3884ffff
@ -35,8 +93,9 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	ADD $-32768, R6, R5             // 38a68000
 	ADD $1234567, R5                // 641f001263ffd6877cbf2a14
 	ADD $1234567, R5, R6            // 641f001263ffd6877cdf2a14
 	ADDIS $8, R3                    // 3c630008
 	ADDIS $1000, R3, R4             // 3c8303e8
 	// and constants
 	ANDCC $1, R3                    // 70630001
 	ANDCC $1, R3, R4                // 70640001
 	ANDCC $-1, R4                   // 3be0ffff7fe42039
@ -51,8 +110,9 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	ANDCC $-32768, R5, R6           // 3be080007fe62839
 	ANDCC $1234567, R5              // 641f001263ffd6877fe52839
 	ANDCC $1234567, R5, R6          // 641f001263ffd6877fe62839
 	ANDISCC $1, R3                  // 74630001
 	ANDISCC $1000, R3, R4           // 746403e8
 	// or constants
 	OR $1, R3                       // 60630001
 	OR $1, R3, R4                   // 60640001
 	OR $-1, R4                      // 3be0ffff7fe42378
@ -68,7 +128,6 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	OR $1234567, R5                 // 641f001263ffd6877fe52b78
 	OR $1234567, R5, R3             // 641f001263ffd6877fe32b78
 	// or constants
 	XOR $1, R3                      // 68630001
 	XOR $1, R3, R4                  // 68640001
 	XOR $-1, R4                     // 3be0ffff7fe42278
@ -84,6 +143,177 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	XOR $1234567, R5                // 641f001263ffd6877fe52a78
 	XOR $1234567, R5, R3            // 641f001263ffd6877fe32a78
 	// TODO: the order of CR operands don't match
 	CMP R3, R4                      // 7c232000
 	CMPU R3, R4                     // 7c232040
 	CMPW R3, R4                     // 7c032000
 	CMPWU R3, R4                    // 7c032040
 	// TODO: constants for ADDC?
 	ADD R3, R4                      // 7c841a14
 	ADD R3, R4, R5                  // 7ca41a14
 	ADDC R3, R4                     // 7c841814
 	ADDC R3, R4, R5                 // 7ca41814
 	ADDE R3, R4                     // 7c841914
 	ADDECC R3, R4                   // 7c841915
 	ADDEV R3, R4                    // 7c841d14
 	ADDEVCC R3, R4                  // 7c841d15
 	ADDV R3, R4                     // 7c841e14
 	ADDVCC R3, R4                   // 7c841e15
 	ADDCCC R3, R4, R5               // 7ca41815
 	ADDME R3, R4                    // 7c8301d4
 	ADDMECC R3, R4                  // 7c8301d5
 	ADDMEV R3, R4                   // 7c8305d4
 	ADDMEVCC R3, R4                 // 7c8305d5
 	ADDCV R3, R4                    // 7c841c14
 	ADDCVCC R3, R4                  // 7c841c15
 	ADDZE R3, R4                    // 7c830194
 	ADDZECC R3, R4                  // 7c830195
 	ADDZEV R3, R4                   // 7c830594
 	ADDZEVCC R3, R4                 // 7c830595
 	SUBME R3, R4                    // 7c8301d0
 	SUBMECC R3, R4                  // 7c8301d1
 	SUBMEV R3, R4                   // 7c8305d0
 	SUBZE R3, R4                    // 7c830190
 	SUBZECC R3, R4                  // 7c830191
 	SUBZEV R3, R4                   // 7c830590
 	SUBZEVCC R3, R4                 // 7c830591
 	AND R3, R4                      // 7c841838
 	AND R3, R4, R5                  // 7c851838
 	ANDN R3, R4, R5                 // 7c851878
 	ANDCC R3, R4, R5                // 7c851839
 	OR R3, R4                       // 7c841b78
 	OR R3, R4, R5                   // 7c851b78
 	ORN R3, R4, R5                  // 7c851b38
 	ORCC R3, R4, R5                 // 7c851b79
 	XOR R3, R4                      // 7c841a78
 	XOR R3, R4, R5                  // 7c851a78
 	XORCC R3, R4, R5                // 7c851a79
 	NAND R3, R4, R5                 // 7c851bb8
 	NANDCC R3, R4, R5               // 7c851bb9
 	EQV R3, R4, R5                  // 7c851a38
 	EQVCC R3, R4, R5                // 7c851a39
 	NOR R3, R4, R5                  // 7c8518f8
 	NORCC R3, R4, R5                // 7c8518f9
 	SUB R3, R4                      // 7c832050
 	SUB R3, R4, R5                  // 7ca32050
 	SUBC R3, R4                     // 7c832010
 	SUBC R3, R4, R5                 // 7ca32010
 	MULLW R3, R4                    // 7c8419d6
 	MULLW R3, R4, R5                // 7ca419d6
 	MULLWCC R3, R4, R5              // 7ca419d7
 	MULHW R3, R4, R5                // 7ca41896
 	MULHWU R3, R4, R5               // 7ca41816
 	MULLD R3, R4                    // 7c8419d2
 	MULLD R4, R4, R5                // 7ca421d2
 	MULLDCC R3, R4, R5              // 7ca419d3
 	MULHD R3, R4, R5                // 7ca41892
 	MULHDCC R3, R4, R5              // 7ca41893
 	MULLWV R3, R4                   // 7c841dd6
 	MULLWV R3, R4, R5               // 7ca41dd6
 	MULLWVCC R3, R4, R5             // 7ca41dd7
 	MULHWUCC R3, R4, R5             // 7ca41817
 	MULLDV R3, R4, R5               // 7ca41dd2
 	MULLDVCC R3, R4, R5             // 7ca41dd3
 	DIVD R3,R4                      // 7c841bd2
 	DIVD R3, R4, R5                 // 7ca41bd2
 	DIVDCC R3,R4, R5                // 7ca41bd3
 	DIVDU R3, R4, R5                // 7ca41b92
 	DIVDV R3, R4, R5                // 7ca41fd2
 	DIVDUCC R3, R4, R5              // 7ca41b93
 	DIVDVCC R3, R4, R5              // 7ca41fd3
 	DIVDUV R3, R4, R5               // 7ca41f92
 	DIVDUVCC R3, R4, R5             // 7ca41f93
 	DIVDE R3, R4, R5                // 7ca41b52
 	DIVDECC R3, R4, R5              // 7ca41b53
 	DIVDEU R3, R4, R5               // 7ca41b12
 	DIVDEUCC R3, R4, R5             // 7ca41b13
 	REM R3, R4, R5                  // 7fe41bd67fff19d67cbf2050
 	REMU R3, R4, R5                 // 7fe41b967fff19d67bff00287cbf2050
 	REMD R3, R4, R5                 // 7fe41bd27fff19d27cbf2050
 	REMDU R3, R4, R5                // 7fe41b927fff19d27cbf2050
 	MODUD R3, R4, R5                // 7ca41a12
 	MODUW R3, R4, R5                // 7ca41a16
 	MODSD R3, R4, R5                // 7ca41e12
 	MODSW R3, R4, R5                // 7ca41e16
 	SLW $8, R3, R4                  // 5464402e
 	SLW R3, R4, R5                  // 7c851830
 	SLWCC R3, R4                    // 7c841831
 	SLD $16, R3, R4                 // 786483e4
 	SLD R3, R4, R5                  // 7c851836
 	SLDCC R3, R4                    // 7c841837
 	SRW $8, R3, R4                  // 5464c23e
 	SRW R3, R4, R5                  // 7c851c30
 	SRWCC R3, R4                    // 7c841c31
 	SRAW $8, R3, R4                 // 7c644670
 	SRAW R3, R4, R5                 // 7c851e30
 	SRAWCC R3, R4                   // 7c841e31
 	SRD $16, R3, R4                 // 78648402
 	SRD R3, R4, R5                  // 7c851c36
 	SRDCC R3, R4                    // 7c841c37
 	SRAD $16, R3, R4                // 7c648674
 	SRAD R3, R4, R5                 // 7c851e34
 	SRDCC R3, R4                    // 7c841c37
 	ROTLW $16, R3, R4               // 5464803e
 	ROTLW R3, R4, R5                // 5c85183e
 	RLWMI $7, R3, $65535, R6        // 50663c3e
 	RLWMICC $7, R3, $65535, R6      // 50663c3f
 	RLWNM $3, R4, $7, R6            // 54861f7e
 	RLWNMCC $3, R4, $7, R6          // 54861f7f
 	RLDMI $0, R4, $7, R6            // 7886076c
 	RLDMICC $0, R4, $7, R6          // 7886076d
 	RLDIMI $0, R4, $7, R6           // 788601cc
 	RLDIMICC $0, R4, $7, R6         // 788601cd
 	RLDC $0, R4, $15, R6            // 78860728
 	RLDCCC $0, R4, $15, R6          // 78860729
 	RLDCL $0, R4, $7, R6            // 78860770
 	RLDCLCC $0, R4, $15, R6         // 78860721
 	RLDCR $0, R4, $-16, R6          // 788606f2
 	RLDCRCC $0, R4, $-16, R6        // 788606f3
 	RLDICL $0, R4, $15, R6          // 788603c0
 	RLDICLCC $0, R4, $15, R6        // 788603c1
 	RLDICR $0, R4, $15, R6          // 788603c4
 	RLDICRCC $0, R4, $15, R6        // 788603c5
 	BEQ 0(PC)                       // 41820000
 	BGE 0(PC)                       // 40800000
 	BGT 4(PC)                       // 41810030
 	BLE 0(PC)                       // 40810000
 	BLT 0(PC)                       // 41800000
 	BNE 0(PC)                       // 40820000
 	JMP 8(PC)                       // 48000020
 	CRAND CR1, CR2, CR3             // 4c620a02
 	CRANDN CR1, CR2, CR3            // 4c620902
 	CREQV CR1, CR2, CR3             // 4c620a42
 	CRNAND CR1, CR2, CR3            // 4c6209c2
 	CRNOR CR1, CR2, CR3             // 4c620842
 	CROR CR1, CR2, CR3              // 4c620b82
 	CRORN CR1, CR2, CR3             // 4c620b42
 	CRXOR CR1, CR2, CR3             // 4c620982
 	ISEL $1, R3, R4, R5             // 7ca3205e
 	ISEL $0, R3, R4, R5             // 7ca3201e
 	ISEL $2, R3, R4, R5             // 7ca3209e
 	ISEL $3, R3, R4, R5             // 7ca320de
 	ISEL $4, R3, R4, R5             // 7ca3211e
 	POPCNTB R3, R4                  // 7c6400f4
 	POPCNTW R3, R4                  // 7c6402f4
 	POPCNTD R3, R4                  // 7c6403f4
 	PASTECC R3, R4                  // 7c23270d
 	COPY R3, R4                     // 7c23260c
 	// load-and-reserve
 	LBAR (R4)(R3*1),$1,R5           // 7ca32069
 	LBAR (R4),$0,R5                 // 7ca02068
@ -98,7 +328,304 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
 	LDAR (R4),$0,R5                 // 7ca020a8
 	LDAR (R3),R5                    // 7ca018a8
 	STBCCC R3, (R4)(R5)             // 7c65256d
 	STWCCC R3, (R4)(R5)             // 7c65212d
 	STDCCC R3, (R4)(R5)             // 7c6521ad
 	STHCCC R3, (R4)(R5)
 	SYNC                            // 7c0004ac
 	ISYNC                           // 4c00012c
 	LWSYNC                          // 7c2004ac
 	DCBF (R3)(R4)                   // 7c0418ac
 	DCBI (R3)(R4)                   // 7c041bac
 	DCBST (R3)(R4)                  // 7c04186c
 	DCBZ (R3)(R4)                   // 7c041fec
 	DCBT (R3)(R4)                   // 7c041a2c
 	ICBI (R3)(R4)                   // 7c041fac
 	// float constants
 	FMOVD $(0.0), F1                // f0210cd0
 	FMOVD $(-0.0), F1               // f0210cd0fc200850
 	FMOVD 8(R3), F1                 // c8230008
 	FMOVD (R3)(R4), F1              // 7c241cae
 	FMOVDU 8(R3), F1                // cc230008
 	FMOVDU (R3)(R4), F1             // 7c241cee
 	FMOVS 4(R3), F1                 // c0230004
 	FMOVS (R3)(R4), F1              // 7c241c2e
 	FMOVSU 4(R3), F1                // c4230004
 	FMOVSU (R3)(R4), F1             // 7c241c6e
 	FMOVD F1, 8(R3)                 // d8230008
 	FMOVD F1, (R3)(R4)              // 7c241dae
 	FMOVDU F1, 8(R3)                // dc230008
 	FMOVDU F1, (R3)(R4)             // 7c241dee
 	FMOVS F1, 4(R3)                 // d0230004
 	FMOVS F1, (R3)(R4)              // 7c241d2e
 	FMOVSU F1, 4(R3)                // d4230004
 	FMOVSU F1, (R3)(R4)             // 7c241d6e
 	FADD F1, F2                     // fc42082a
 	FADD F1, F2, F3                 // fc62082a
 	FADDCC F1, F2, F3               // fc62082b
 	FADDS F1, F2                    // ec42082a
 	FADDS F1, F2, F3                // ec62082a
 	FADDSCC F1, F2, F3              // ec62082b
 	FSUB F1, F2                     // fc420828
 	FSUB F1, F2, F3                 // fc620828
 	FSUBCC F1, F2, F3               // fc620829
 	FSUBS F1, F2                    // ec420828
 	FSUBS F1, F2, F3                // ec620828
 	FSUBCC F1, F2, F3               // fc620829
 	FMUL F1, F2                     // fc420072
 	FMUL F1, F2, F3                 // fc620072
 	FMULCC F1, F2, F3               // fc620073
 	FMULS F1, F2                    // ec420072
 	FMULS F1, F2, F3                // ec620072
 	FMULSCC F1, F2, F3              // ec620073
 	FDIV F1, F2                     // fc420824
 	FDIV F1, F2, F3                 // fc620824
 	FDIVCC F1, F2, F3               // fc620825
 	FDIVS F1, F2                    // ec420824
 	FDIVS F1, F2, F3                // ec620824
 	FDIVSCC F1, F2, F3              // ec620825
 	FMADD F1, F2, F3, F4            // fc8110fa
 	FMADDCC F1, F2, F3, F4          // fc8110fb
 	FMADDS F1, F2, F3, F4           // ec8110fa
 	FMADDSCC F1, F2, F3, F4         // ec8110fb
 	FMSUB F1, F2, F3, F4            // fc8110f8
 	FMSUBCC F1, F2, F3, F4          // fc8110f9
 	FMSUBS F1, F2, F3, F4           // ec8110f8
 	FMSUBSCC F1, F2, F3, F4         // ec8110f9
 	FNMADD F1, F2, F3, F4           // fc8110fe
 	FNMADDCC F1, F2, F3, F4         // fc8110ff
 	FNMADDS F1, F2, F3, F4          // ec8110fe
 	FNMADDSCC F1, F2, F3, F4        // ec8110ff
 	FNMSUB F1, F2, F3, F4           // fc8110fc
 	FNMSUBCC F1, F2, F3, F4         // fc8110fd
 	FNMSUBS F1, F2, F3, F4          // ec8110fc
 	FNMSUBSCC F1, F2, F3, F4        // ec8110fd
 	FSEL F1, F2, F3, F4             // fc8110ee
 	FSELCC F1, F2, F3, F4           // fc8110ef
 	FABS F1, F2                     // fc400a10
 	FABSCC F1, F2                   // fc400a11
 	FNEG F1, F2                     // fc400850
 	FABSCC F1, F2                   // fc400a11
 	FRSP F1, F2                     // fc400818
 	FRSPCC F1, F2                   // fc400819
 	FCTIW F1, F2                    // fc40081c
 	FCTIWCC F1, F2                  // fc40081d
 	FCTIWZ F1, F2                   // fc40081e
 	FCTIWZCC F1, F2                 // fc40081f
 	FCTID F1, F2                    // fc400e5c
 	FCTIDCC F1, F2                  // fc400e5d
 	FCTIDZ F1, F2                   // fc400e5e
 	FCTIDZCC F1, F2                 // fc400e5f
 	FCFID F1, F2                    // fc400e9c
 	FCFIDCC F1, F2                  // fc400e9d
 	FCFIDU F1, F2                   // fc400f9c
 	FCFIDUCC F1, F2                 // fc400f9d
 	FCFIDS F1, F2                   // ec400e9c
 	FCFIDSCC F1, F2                 // ec400e9d
 	FRES F1, F2                     // ec400830
 	FRESCC F1, F2                   // ec400831
 	FRIM F1, F2                     // fc400bd0
 	FRIMCC F1, F2                   // fc400bd1
 	FRIP F1, F2                     // fc400b90
 	FRIPCC F1, F2                   // fc400b91
 	FRIZ F1, F2                     // fc400b50
 	FRIZCC F1, F2                   // fc400b51
 	FRIN F1, F2                     // fc400b10
 	FRINCC F1, F2                   // fc400b11
 	FRSQRTE F1, F2                  // fc400834
 	FRSQRTECC F1, F2                // fc400835
 	FSQRT F1, F2                    // fc40082c
 	FSQRTCC F1, F2                  // fc40082d
 	FSQRTS F1, F2                   // ec40082c
 	FSQRTSCC F1, F2                 // ec40082d
 	FCPSGN F1, F2                   // fc420810
 	FCPSGNCC F1, F2                 // fc420811
 	FCMPO F1, F2                    // fc011040
 	FCMPU F1, F2                    // fc011000
 	LVX (R3)(R4), V1                // 7c2418ce
 	LVXL (R3)(R4), V1               // 7c241ace
 	LVSL (R3)(R4), V1               // 7c24180c
 	LVSR (R3)(R4), V1               // 7c24184c
 	LVEBX (R3)(R4), V1              // 7c24180e
 	LVEHX (R3)(R4), V1              // 7c24184e
 	LVEWX (R3)(R4), V1              // 7c24188e
 	STVX V1, (R3)(R4)               // 7c2419ce
 	STVXL V1, (R3)(R4)              // 7c241bce
 	STVEBX V1, (R3)(R4)             // 7c24190e
 	STVEHX V1, (R3)(R4)             // 7c24194e
 	STVEWX V1, (R3)(R4)             // 7c24198e
 	VAND V1, V2, V3                 // 10611404
 	VANDC V1, V2, V3                // 10611444
 	VNAND V1, V2, V3                // 10611584
 	VOR V1, V2, V3                  // 10611484
 	VORC V1, V2, V3                 // 10611544
 	VXOR V1, V2, V3                 // 106114c4
 	VNOR V1, V2, V3                 // 10611504
 	VEQV V1, V2, V3                 // 10611684
 	VADDUBM V1, V2, V3              // 10611000
 	VADDUHM V1, V2, V3              // 10611040
 	VADDUWM V1, V2, V3              // 10611080
 	VADDUDM V1, V2, V3              // 106110c0
 	VADDUQM V1, V2, V3              // 10611100
 	VADDCUQ V1, V2, V3              // 10611140
 	VADDCUW V1, V2, V3              // 10611180
 	VADDUBS V1, V2, V3              // 10611200
 	VADDUHS V1, V2, V3              // 10611240
 	VADDUWS V1, V2, V3              // 10611280
 	VSUBUBM V1, V2, V3              // 10611400
 	VSUBUHM V1, V2, V3              // 10611440
 	VSUBUWM V1, V2, V3              // 10611480
 	VSUBUDM V1, V2, V3              // 106114c0
 	VSUBUQM V1, V2, V3              // 10611500
 	VSUBCUQ V1, V2, V3              // 10611540
 	VSUBCUW V1, V2, V3              // 10611580
 	VSUBUBS V1, V2, V3              // 10611600
 	VSUBUHS V1, V2, V3              // 10611640
 	VSUBUWS V1, V2, V3              // 10611680
 	VSUBSBS V1, V2, V3              // 10611700
 	VSUBSHS V1, V2, V3              // 10611740
 	VSUBSWS V1, V2, V3              // 10611780
 	VSUBEUQM V1, V2, V3, V4         // 108110fe
 	VSUBECUQ V1, V2, V3, V4         // 108110ff
 	VMULESB V1, V2, V3              // 10611308
 	VMULOSB V1, V2, V3              // 10611108
 	VMULEUB V1, V2, V3              // 10611208
 	VMULOUB V1, V2, V3              // 10611008
 	VMULESH V1, V2, V3              // 10611348
 	VMULOSH V1, V2, V3              // 10611148
 	VMULEUH V1, V2, V3              // 10611248
 	VMULOUH V1, V2, V3              // 10611048
 	VMULESH V1, V2, V3              // 10611348
 	VMULOSW V1, V2, V3              // 10611188
 	VMULEUW V1, V2, V3              // 10611288
 	VMULOUW V1, V2, V3              // 10611088
 	VMULUWM V1, V2, V3              // 10611089
 	VPMSUMB V1, V2, V3              // 10611408
 	VPMSUMH V1, V2, V3              // 10611448
 	VPMSUMW V1, V2, V3              // 10611488
 	VPMSUMD V1, V2, V3              // 106114c8
 	VMSUMUDM V1, V2, V3, V4         // 108110e3
 	VRLB V1, V2, V3                 // 10611004
 	VRLH V1, V2, V3                 // 10611044
 	VRLW V1, V2, V3                 // 10611084
 	VRLD V1, V2, V3                 // 106110c4
 	VSLB V1, V2, V3                 // 10611104
 	VSLH V1, V2, V3                 // 10611144
 	VSLW V1, V2, V3                 // 10611184
 	VSL V1, V2, V3                  // 106111c4
 	VSLO V1, V2, V3                 // 1061140c
 	VSRB V1, V2, V3                 // 10611204
 	VSRH V1, V2, V3                 // 10611244
 	VSRW V1, V2, V3                 // 10611284
 	VSR V1, V2, V3                  // 106112c4
 	VSRO V1, V2, V3                 // 1061144c
 	VSLD V1, V2, V3                 // 106115c4
 	VSRAB V1, V2, V3                // 10611304
 	VSRAH V1, V2, V3                // 10611344
 	VSRAW V1, V2, V3                // 10611384
 	VSRAD V1, V2, V3                // 106113c4
 	VSLDOI $3, V1, V2, V3           // 106110ec
 	VCLZB V1, V2                    // 10400f02
 	VCLZH V1, V2                    // 10400f42
 	VCLZW V1, V2                    // 10400f82
 	VCLZD V1, V2                    // 10400fc2
 	VPOPCNTB V1, V2                 // 10400f03
 	VPOPCNTH V1, V2                 // 10400f43
 	VPOPCNTW V1, V2                 // 10400f83
 	VPOPCNTD V1, V2                 // 10400fc3
 	VCMPEQUB V1, V2, V3             // 10611006
 	VCMPEQUBCC V1, V2, V3           // 10611406
 	VCMPEQUH V1, V2, V3             // 10611046
 	VCMPEQUHCC V1, V2, V3           // 10611446
 	VCMPEQUW V1, V2, V3             // 10611086
 	VCMPEQUWCC V1, V2, V3           // 10611486
 	VCMPEQUD V1, V2, V3             // 106110c7
 	VCMPEQUDCC V1, V2, V3           // 106114c7
 	VCMPGTUB V1, V2, V3             // 10611206
 	VCMPGTUBCC V1, V2, V3           // 10611606
 	VCMPGTUH V1, V2, V3             // 10611246
 	VCMPGTUHCC V1, V2, V3           // 10611646
 	VCMPGTUW V1, V2, V3             // 10611286
 	VCMPGTUWCC V1, V2, V3           // 10611686
 	VCMPGTUD V1, V2, V3             // 106112c7
 	VCMPGTUDCC V1, V2, V3           // 106116c7
 	VCMPGTSB V1, V2, V3             // 10611306
 	VCMPGTSBCC V1, V2, V3           // 10611706
 	VCMPGTSH V1, V2, V3             // 10611346
 	VCMPGTSHCC V1, V2, V3           // 10611746
 	VCMPGTSW V1, V2, V3             // 10611386
 	VCMPGTSWCC V1, V2, V3           // 10611786
 	VCMPGTSD V1, V2, V3             // 106113c7
 	VCMPGTSDCC V1, V2, V3           // 106117c7
 	VCMPNEZB V1, V2, V3             // 10611107
 	VCMPNEZBCC V1, V2, V3           // 10611507
 	VCMPNEB V1, V2, V3              // 10611007
 	VCMPNEBCC V1, V2, V3            // 10611407
 	VCMPNEH V1, V2, V3              // 10611047
 	VCMPNEHCC V1, V2, V3            // 10611447
 	VCMPNEW V1, V2, V3              // 10611087
 	VCMPNEWCC V1, V2, V3            // 10611487
 	VPERM V1, V2, V3, V4            // 108110eb
 	VPERMR V1, V2, V3, V4           // 108110fb
 	VPERMXOR V1, V2, V3, V4         // 108110ed
 	VBPERMQ V1, V2, V3              // 1061154c
 	VBPERMD V1, V2, V3              // 106115cc
 	VSEL V1, V2, V3, V4             // 108110ea
 	VSPLTB $1, V1, V2               // 10410a0c
 	VSPLTH $1, V1, V2               // 10410a4c
 	VSPLTW $1, V1, V2               // 10410a8c
 	VSPLTISB $1, V1                 // 1021030c
 	VSPLTISW $1, V1                 // 1021038c
 	VSPLTISH $1, V1                 // 1021034c
 	VCIPHER V1, V2, V3              // 10611508
 	VCIPHERLAST V1, V2, V3          // 10611509
 	VNCIPHER V1, V2, V3             // 10611548
 	VNCIPHERLAST V1, V2, V3         // 10611549
 	VSBOX V1, V2                    // 104105c8
 	VSHASIGMAW $1, V1, $15, V2      // 10418e82
 	VSHASIGMAD $2, V1, $15, V2      // 104196c2
 	LXVD2X (R3)(R4), VS1            // 7c241e98
 	LXV 16(R3), VS1                 // f4230011
 	LXVL R3, R4, VS1                // 7c23221a
 	LXVLL R3, R4, VS1               // 7c23225a
 	LXSDX (R3)(R4), VS1             // 7c241c98
 	STXVD2X VS1, (R3)(R4)           // 7c241f98
 	STXV VS1,16(R3)                 // f4230015
 	STXVL VS1, R3, R4               // 7c23231a
 	STXVLL VS1, R3, R4              // 7c23235a
 	STXSDX VS1, (R3)(R4)            // 7c241d98
 	LXSIWAX (R3)(R4), VS1           // 7c241898
 	STXSIWX VS1, (R3)(R4)           // 7c241918
 	MFVSRD VS1, R3                  // 7c230066
 	MTVSRD R3, VS1                  // 7c230166
 	XXLAND VS1, VS2, VS3            // f0611410
 	XXLOR VS1, VS2, VS3             // f0611490
 	XXLORC VS1, VS2, VS3            // f0611550
 	XXLXOR VS1, VS2, VS3            // f06114d0
 	XXSEL VS1, VS2, VS3, VS4        // f08110f0
 	XXMRGHW VS1, VS2, VS3           // f0611090
 	XXSPLTW VS1, $1, VS2            // f0410a90
 	XXPERM VS1, VS2, VS3            // f06110d0
 	XXSLDWI VS1, VS2, $1, VS3       // f0611110
 	XSCVDPSP VS1, VS2               // f0400c24
 	XVCVDPSP VS1, VS2               // f0400e24
 	XSCVSXDDP VS1, VS2              // f0400de0
 	XVCVDPSXDS VS1, VS2             // f0400f60
 	XVCVSXDDP VS1, VS2              // f0400fe0
 	MOVD R3, LR                     // 7c6803a6
 	MOVD R3, CTR                    // 7c6903a6
 	MOVD R3, XER                    // 7c6103a6
 	MOVD LR, R3                     // 7c6802a6
 	MOVD CTR, R3                    // 7c6902a6
 	MOVD XER, R3                    // 7c6102a6
 	MOVFL CR3, CR1                  // 4c8c0000
 	RET
--- a/src/cmd/asm/internal/asm/testdata/s390x.s
+++ b/src/cmd/asm/internal/asm/testdata/s390x.s
@ -89,6 +89,7 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
 	ADDW	R1, R2, R3            // b9f81032
 	ADDW	$8192, R1             // a71a2000
 	ADDW	$8192, R1, R2         // ec21200000d8
 	ADDE	R1, R2                // b9880021
 	SUB	R3, R4                // b9090043
 	SUB	R3, R4, R5            // b9e93054
 	SUB	$8192, R3             // a73be000
@ -183,6 +184,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
 	ADDW	4095(R7), R8          // 5a807fff
 	ADDW	-1(R1), R2            // e3201fffff5a
 	ADDW	4096(R3), R4          // e3403000015a
 	ADDE	4096(R3), R4          // e34030000188
 	ADDE	4096(R3)(R2*1), R4    // e34230000188
 	ADDE	524288(R3)(R4*1), R5  // c0a10008000041aa4000e35a30000088
 	MULLD	(R1)(R2*1), R3        // e3321000000c
 	MULLW	(R3)(R4*1), R5        // 71543000
 	MULLW	4096(R3), R4          // e34030000151
@ -259,6 +263,7 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
 	NC	$8, (R15), n-8(SP)       // d407f010f000
 	OC	$8, (R15), n-8(SP)       // d607f010f000
 	MVC	$8, (R15), n-8(SP)       // d207f010f000
 	MVCIN	$8, (R15), n-8(SP)       // e807f010f000
 	CLC	$8, (R15), n-8(SP)       // d507f000f010
 	XC	$256, -8(R15), -8(R15)   // b90400afc2a8fffffff8d7ffa000a000
 	MVC	$256, 8192(R1), 8192(R2) // b90400a2c2a800002000b90400b1c2b800002000d2ffa000b000
--- a/src/cmd/asm/internal/flags/flags.go
+++ b/src/cmd/asm/internal/flags/flags.go
@ -23,9 +23,10 @@ var (
 	Dynlink    = flag.Bool("dynlink", false, "support references to Go symbols defined in other shared libraries")
 	AllErrors  = flag.Bool("e", false, "no limit on number of errors reported")
 	SymABIs    = flag.Bool("gensymabis", false, "write symbol ABI information to output file, don't assemble")
-	Newobj     = flag.Bool("newobj", false, "use new object file format")
+	Importpath = flag.String("p", "", "set expected package import to path")
 	Spectre    = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)")
-	Spectre = flag.String("spectre", "", "enable spectre mitigations in `list` (all, ret)")
+	Go115Newobj = flag.Bool("go115newobj", true, "use new object file format")
 )
 var (
--- a/src/cmd/asm/main.go
+++ b/src/cmd/asm/main.go
@ -40,7 +40,8 @@ func main() {
 	}
 	ctxt.Flag_dynlink = *flags.Dynlink
 	ctxt.Flag_shared = *flags.Shared || *flags.Dynlink
-	ctxt.Flag_newobj = *flags.Newobj
+	ctxt.Flag_go115newobj = *flags.Go115Newobj
 	ctxt.IsAsm = true
 	switch *flags.Spectre {
 	default:
 		log.Printf("unknown setting -spectre=%s", *flags.Spectre)
@ -87,7 +88,7 @@ func main() {
 			pList.Firstpc, ok = parser.Parse()
 			// reports errors to parser.Errorf
 			if ok {
-				obj.Flushplist(ctxt, pList, nil, "")
+				obj.Flushplist(ctxt, pList, nil, *flags.Importpath)
 			}
 		}
 		if !ok {
--- a/src/cmd/cgo/gcc.go
+++ b/src/cmd/cgo/gcc.go
@ -182,6 +182,9 @@ func (p *Package) Translate(f *File) {
 		numTypedefs = len(p.typedefs)
 		// Also ask about any typedefs we've seen so far.
 		for _, info := range p.typedefList {
 			if f.Name[info.typedef] != nil {
 				continue
 			}
 			n := &Name{
 				Go: info.typedef,
 				C:  info.typedef,
@ -333,7 +336,7 @@ func (p *Package) guessKinds(f *File) []*Name {
 	//	void __cgo_f_xxx_5(void) { static const char __cgo_undefined__5[] = (name); }
 	//
 	// If we see an error at not-declared:xxx, the corresponding name is not declared.
-	// If we see an error at not-type:xxx, the corresponding name is a type.
+	// If we see an error at not-type:xxx, the corresponding name is not a type.
 	// If we see an error at not-int-const:xxx, the corresponding name is not an integer constant.
 	// If we see an error at not-num-const:xxx, the corresponding name is not a number constant.
 	// If we see an error at not-str-lit:xxx, the corresponding name is not a string literal.
@ -710,6 +713,9 @@ func (p *Package) prepareNames(f *File) {
 			}
 		}
 		p.mangleName(n)
 		if n.Kind == "type" && typedef[n.Mangle] == nil {
 			typedef[n.Mangle] = n.Type
 		}
 	}
 }
@ -1348,6 +1354,9 @@ func (p *Package) rewriteRef(f *File) {
 		if *godefs {
 			// Substitute definition for mangled type name.
 			if r.Name.Type != nil {
 				expr = r.Name.Type.Go
 			}
 			if id, ok := expr.(*ast.Ident); ok {
 				if t := typedef[id.Name]; t != nil {
 					expr = t.Go
@ -1413,9 +1422,7 @@ func (p *Package) rewriteName(f *File, r *Ref) ast.Expr {
 				r.Context = ctxType
 				if r.Name.Type == nil {
 					error_(r.Pos(), "invalid conversion to C.%s: undefined C type '%s'", fixGo(r.Name.Go), r.Name.C)
 					break
 				}
 				expr = r.Name.Type.Go
 				break
 			}
 			error_(r.Pos(), "call of non-function C.%s", fixGo(r.Name.Go))
@ -1472,9 +1479,7 @@ func (p *Package) rewriteName(f *File, r *Ref) ast.Expr {
 			// Okay - might be new(T)
 			if r.Name.Type == nil {
 				error_(r.Pos(), "expression C.%s: undefined C type '%s'", fixGo(r.Name.Go), r.Name.C)
 				break
 			}
 			expr = r.Name.Type.Go
 		case "var":
 			expr = &ast.StarExpr{Star: (*r.Expr).Pos(), X: expr}
 		case "macro":
@ -1493,8 +1498,6 @@ func (p *Package) rewriteName(f *File, r *Ref) ast.Expr {
 			// Use of C.enum_x, C.struct_x or C.union_x without C definition.
 			// GCC won't raise an error when using pointers to such unknown types.
 			error_(r.Pos(), "type C.%s: undefined C type '%s'", fixGo(r.Name.Go), r.Name.C)
 		} else {
 			expr = r.Name.Type.Go
 		}
 	default:
 		if r.Name.Kind == "func" {
@ -2060,6 +2063,10 @@ var goIdent = make(map[string]*ast.Ident)
 // that may contain a pointer. This is used for cgo pointer checking.
 var unionWithPointer = make(map[ast.Expr]bool)
 // anonymousStructTag provides a consistent tag for an anonymous struct.
 // The same dwarf.StructType pointer will always get the same tag.
 var anonymousStructTag = make(map[*dwarf.StructType]string)
 func (c *typeConv) Init(ptrSize, intSize int64) {
 	c.ptrSize = ptrSize
 	c.intSize = intSize
@ -2408,8 +2415,12 @@ func (c *typeConv) loadType(dtype dwarf.Type, pos token.Pos, parent string) *Typ
 			break
 		}
 		if tag == "" {
-			tag = "__" + strconv.Itoa(tagGen)
+			tag = anonymousStructTag[dt]
-			tagGen++
+			if tag == "" {
 				tag = "__" + strconv.Itoa(tagGen)
 				tagGen++
 				anonymousStructTag[dt] = tag
 			}
 		} else if t.C.Empty() {
 			t.C.Set(dt.Kind + " " + tag)
 		}
@ -3006,8 +3017,9 @@ func (c *typeConv) anonymousStructTypedef(dt *dwarf.TypedefType) bool {
 	return ok && st.StructName == ""
 }
-// badPointerTypedef reports whether t is a C typedef that should not be considered a pointer in Go.
+// badPointerTypedef reports whether dt is a C typedef that should not be
-// A typedef is bad if C code sometimes stores non-pointers in this type.
+// considered a pointer in Go. A typedef is bad if C code sometimes stores
 // non-pointers in this type.
 // TODO: Currently our best solution is to find these manually and list them as
 // they come up. A better solution is desired.
 func (c *typeConv) badPointerTypedef(dt *dwarf.TypedefType) bool {
--- a/src/cmd/compile/doc.go
+++ b/src/cmd/compile/doc.go
@ -195,30 +195,58 @@ directive can skip over a directive like any other comment.
 // Line directives typically appear in machine-generated code, so that compilers and debuggers
 // will report positions in the original input to the generator.
 /*
-The line directive is an historical special case; all other directives are of the form
+The line directive is a historical special case; all other directives are of the form
-//go:name and must start at the beginning of a line, indicating that the directive is defined
+//go:name, indicating that they are defined by the Go toolchain.
-by the Go toolchain.
+Each directive must be placed its own line, with only leading spaces and tabs
 allowed before the comment.
 Each directive applies to the Go code that immediately follows it,
 which typically must be a declaration.
 	//go:noescape
-The //go:noescape directive specifies that the next declaration in the file, which
+The //go:noescape directive must be followed by a function declaration without
-must be a func without a body (meaning that it has an implementation not written
+a body (meaning that the function has an implementation not written in Go).
-in Go) does not allow any of the pointers passed as arguments to escape into the
+It specifies that the function does not allow any of the pointers passed as
-heap or into the values returned from the function. This information can be used
+arguments to escape into the heap or into the values returned from the function.
-during the compiler's escape analysis of Go code calling the function.
+This information can be used during the compiler's escape analysis of Go code
 calling the function.
 	//go:uintptrescapes
 The //go:uintptrescapes directive must be followed by a function declaration.
 It specifies that the function's uintptr arguments may be pointer values
 that have been converted to uintptr and must be treated as such by the
 garbage collector. The conversion from pointer to uintptr must appear in
 the argument list of any call to this function. This directive is necessary
 for some low-level system call implementations and should be avoided otherwise.
 	//go:noinline
 The //go:noinline directive must be followed by a function declaration.
 It specifies that calls to the function should not be inlined, overriding
 the compiler's usual optimization rules. This is typically only needed
 for special runtime functions or when debugging the compiler.
 	//go:norace
 The //go:norace directive must be followed by a function declaration.
 It specifies that the function's memory accesses must be ignored by the
 race detector. This is most commonly used in low-level code invoked
 at times when it is unsafe to call into the race detector runtime.
 	//go:nosplit
-The //go:nosplit directive specifies that the next function declared in the file must
+The //go:nosplit directive must be followed by a function declaration.
-not include a stack overflow check. This is most commonly used by low-level
+It specifies that the function must omit its usual stack overflow check.
-runtime sources invoked at times when it is unsafe for the calling goroutine to be
+This is most commonly used by low-level runtime code invoked
-preempted.
+at times when it is unsafe for the calling goroutine to be preempted.
 	//go:linkname localname [importpath.name]
-The //go:linkname directive instructs the compiler to use ``importpath.name'' as the
+This special directive does not apply to the Go code that follows it.
-object file symbol name for the variable or function declared as ``localname'' in the
+Instead, the //go:linkname directive instructs the compiler to use ``importpath.name''
-source code.
+as the object file symbol name for the variable or function declared as ``localname''
 in the source code.
 If the ``importpath.name'' argument is omitted, the directive uses the
 symbol's default object file symbol name and only has the effect of making
 the symbol accessible to other packages.
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@ -112,6 +112,7 @@ var knownFormats = map[string]string{
 	"cmd/compile/internal/ssa.Location %s":            "",
 	"cmd/compile/internal/ssa.Op %s":                  "",
 	"cmd/compile/internal/ssa.Op %v":                  "",
 	"cmd/compile/internal/ssa.Sym %v":                 "",
 	"cmd/compile/internal/ssa.ValAndOff %s":           "",
 	"cmd/compile/internal/ssa.domain %v":              "",
 	"cmd/compile/internal/ssa.posetNode %v":           "",
@ -156,7 +157,6 @@ var knownFormats = map[string]string{
 	"int64 %+d":                                       "",
 	"int64 %-10d":                                     "",
 	"int64 %.5d":                                      "",
 	"int64 %X":                                        "",
 	"int64 %d":                                        "",
 	"int64 %v":                                        "",
 	"int64 %x":                                        "",
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@ -752,7 +752,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Reg = v.Args[0].Reg()
 		gc.AddAux(&p.To, v)
 	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
-		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2:
+		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
 		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
 		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
 		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
 		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
 		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8:
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = v.Args[2].Reg()
@ -796,6 +801,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = v.Args[0].Reg()
 		gc.AddAux2(&p.To, v, off)
 	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_CONST
@ -804,11 +810,29 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = v.Args[0].Reg()
 		gc.AddAux2(&p.To, v, sc.Off())
-	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
+	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
 		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
 		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
 		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
 		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_CONST
 		sc := v.AuxValAndOff()
 		p.From.Offset = sc.Val()
 		switch {
 		case p.As == x86.AADDQ && p.From.Offset == 1:
 			p.As = x86.AINCQ
 			p.From.Type = obj.TYPE_NONE
 		case p.As == x86.AADDQ && p.From.Offset == -1:
 			p.As = x86.ADECQ
 			p.From.Type = obj.TYPE_NONE
 		case p.As == x86.AADDL && p.From.Offset == 1:
 			p.As = x86.AINCL
 			p.From.Type = obj.TYPE_NONE
 		case p.As == x86.AADDL && p.From.Offset == -1:
 			p.As = x86.ADECL
 			p.From.Type = obj.TYPE_NONE
 		}
 		memIdx(&p.To, v)
 		gc.AddAux2(&p.To, v, sc.Off())
 	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
@ -840,6 +864,28 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p := s.Prog(v.Op.Asm())
 		p.From.Type = obj.TYPE_MEM
 		p.From.Reg = v.Args[1].Reg()
 		gc.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 		if v.Reg() != v.Args[0].Reg() {
 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
 		}
 	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
 		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
 		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
 		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
 		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8:
 		p := s.Prog(v.Op.Asm())
 		r, i := v.Args[1].Reg(), v.Args[2].Reg()
 		p.From.Type = obj.TYPE_MEM
 		p.From.Scale = v.Op.Scale()
 		if p.From.Scale == 1 && i == x86.REG_SP {
 			r, i = i, r
 		}
 		p.From.Reg = r
 		p.From.Index = i
 		gc.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
@ -872,7 +918,16 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p := s.Prog(obj.ADUFFCOPY)
 		p.To.Type = obj.TYPE_ADDR
 		p.To.Sym = gc.Duffcopy
-		p.To.Offset = v.AuxInt
+		if v.AuxInt%16 != 0 {
 			v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
 		}
 		p.To.Offset = 14 * (64 - v.AuxInt/16)
 		// 14 and 64 are magic constants.  14 is the number of bytes to encode:
 		//	MOVUPS	(SI), X0
 		//	ADDQ	$16, SI
 		//	MOVUPS	X0, (DI)
 		//	ADDQ	$16, DI
 		// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
 	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
 		if v.Type.IsMemory() {
@ -902,6 +957,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Type = obj.TYPE_REG
 		p.From.Reg = v.Args[0].Reg()
 		gc.AddrAuto(&p.To, v)
 	case ssa.OpAMD64LoweredHasCPUFeature:
 		p := s.Prog(x86.AMOVBQZX)
 		p.From.Type = obj.TYPE_MEM
 		gc.AddAux(&p.From, v)
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = v.Reg()
 	case ssa.OpAMD64LoweredGetClosurePtr:
 		// Closure pointer is DX.
 		gc.CheckLoweredGetClosurePtr(v)
@ -1095,7 +1156,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Reg = x86.REG_AX
 		p.To.Type = obj.TYPE_MEM
 		p.To.Reg = v.Args[0].Reg()
 		gc.AddAux(&p.To, v)
 		if logopt.Enabled() {
 			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
 		}
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@ -1083,7 +1083,7 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
 				s.Br(obj.AJMP, b.Succs[0].Block())
 			}
 		}
-		p.From.Offset = b.Aux.(int64)
+		p.From.Offset = b.AuxInt
 		p.From.Type = obj.TYPE_CONST
 		p.Reg = b.Controls[0].Reg()
--- a/src/cmd/compile/internal/gc/alg.go
+++ b/src/cmd/compile/internal/gc/alg.go
@ -8,12 +8,15 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"fmt"
 	"sort"
 )
 // AlgKind describes the kind of algorithms used for comparing and
 // hashing a Type.
 type AlgKind int
 //go:generate stringer -type AlgKind -trimprefix A
 const (
 	// These values are known by runtime.
 	ANOEQ AlgKind = iota
@ -502,7 +505,7 @@ func geneq(t *types.Type) *obj.LSym {
 		namedfield("p", types.NewPtr(t)),
 		namedfield("q", types.NewPtr(t)),
 	)
-	tfn.Rlist.Set1(anonfield(types.Types[TBOOL]))
+	tfn.Rlist.Set1(namedfield("r", types.Types[TBOOL]))
 	fn := dclfunc(sym, tfn)
 	np := asNode(tfn.Type.Params().Field(0).Nname)
@ -516,48 +519,137 @@ func geneq(t *types.Type) *obj.LSym {
 		Fatalf("geneq %v", t)
 	case TARRAY:
-		// An array of pure memory would be handled by the
+		nelem := t.NumElem()
 		// standard memequal, so the element type must not be
 		// pure memory. Even if we unrolled the range loop,
 		// each iteration would be a function call, so don't bother
 		// unrolling.
 		nrange := nod(ORANGE, nil, nod(ODEREF, np, nil))
-		ni := newname(lookup("i"))
+		// checkAll generates code to check the equality of all array elements.
-		ni.Type = types.Types[TINT]
+		// If unroll is greater than nelem, checkAll generates:
-		nrange.List.Set1(ni)
+		//
-		nrange.SetColas(true)
+		// if eq(p[0], q[0]) && eq(p[1], q[1]) && ... {
-		colasdefn(nrange.List.Slice(), nrange)
+		// } else {
-		ni = nrange.List.First()
+		//   return
 		// }
 		//
 		// And so on.
 		//
 		// Otherwise it generates:
 		//
 		// for i := 0; i < nelem; i++ {
 		//   if eq(p[i], q[i]) {
 		//   } else {
 		//     return
 		//   }
 		// }
 		//
 		// TODO(josharian): consider doing some loop unrolling
 		// for larger nelem as well, processing a few elements at a time in a loop.
 		checkAll := func(unroll int64, eq func(pi, qi *Node) *Node) {
 			// checkIdx generates a node to check for equality at index i.
 			checkIdx := func(i *Node) *Node {
 				// pi := p[i]
 				pi := nod(OINDEX, np, i)
 				pi.SetBounded(true)
 				pi.Type = t.Elem()
 				// qi := q[i]
 				qi := nod(OINDEX, nq, i)
 				qi.SetBounded(true)
 				qi.Type = t.Elem()
 				return eq(pi, qi)
 			}
-		// if p[i] != q[i] { return false }
+			if nelem <= unroll {
-		nx := nod(OINDEX, np, ni)
+				// Generate a series of checks.
 				var cond *Node
 				for i := int64(0); i < nelem; i++ {
 					c := nodintconst(i)
 					check := checkIdx(c)
 					if cond == nil {
 						cond = check
 						continue
 					}
 					cond = nod(OANDAND, cond, check)
 				}
 				nif := nod(OIF, cond, nil)
 				nif.Rlist.Append(nod(ORETURN, nil, nil))
 				fn.Nbody.Append(nif)
 				return
 			}
-		nx.SetBounded(true)
+			// Generate a for loop.
-		ny := nod(OINDEX, nq, ni)
+			// for i := 0; i < nelem; i++
-		ny.SetBounded(true)
+			i := temp(types.Types[TINT])
-
+			init := nod(OAS, i, nodintconst(0))
-		nif := nod(OIF, nil, nil)
+			cond := nod(OLT, i, nodintconst(nelem))
-		nif.Left = nod(ONE, nx, ny)
+			post := nod(OAS, i, nod(OADD, i, nodintconst(1)))
-		r := nod(ORETURN, nil, nil)
+			loop := nod(OFOR, cond, post)
-		r.List.Append(nodbool(false))
+			loop.Ninit.Append(init)
-		nif.Nbody.Append(r)
+			// if eq(pi, qi) {} else { return }
-		nrange.Nbody.Append(nif)
+			check := checkIdx(i)
-		fn.Nbody.Append(nrange)
+			nif := nod(OIF, check, nil)
 			nif.Rlist.Append(nod(ORETURN, nil, nil))
 			loop.Nbody.Append(nif)
 			fn.Nbody.Append(loop)
 		}
 		switch t.Elem().Etype {
 		case TINTER:
 			// Do two loops. First, check that all the types match (cheap).
 			// Second, check that all the data match (expensive).
 			// TODO: when the array size is small, unroll the tab match checks.
 			checkAll(3, func(pi, qi *Node) *Node {
 				// Compare types.
 				pi = typecheck(pi, ctxExpr)
 				qi = typecheck(qi, ctxExpr)
 				eqtab, _ := eqinterface(pi, qi)
 				return eqtab
 			})
 			checkAll(1, func(pi, qi *Node) *Node {
 				// Compare data.
 				pi = typecheck(pi, ctxExpr)
 				qi = typecheck(qi, ctxExpr)
 				_, eqdata := eqinterface(pi, qi)
 				return eqdata
 			})
 		case TSTRING:
 			// Do two loops. First, check that all the lengths match (cheap).
 			// Second, check that all the contents match (expensive).
 			// TODO: when the array size is small, unroll the length match checks.
 			checkAll(3, func(pi, qi *Node) *Node {
 				// Compare lengths.
 				eqlen, _ := eqstring(pi, qi)
 				return eqlen
 			})
 			checkAll(1, func(pi, qi *Node) *Node {
 				// Compare contents.
 				_, eqmem := eqstring(pi, qi)
 				return eqmem
 			})
 		case TFLOAT32, TFLOAT64:
 			checkAll(2, func(pi, qi *Node) *Node {
 				// p[i] == q[i]
 				return nod(OEQ, pi, qi)
 			})
 		// TODO: pick apart structs, do them piecemeal too
 		default:
 			checkAll(1, func(pi, qi *Node) *Node {
 				// p[i] == q[i]
 				return nod(OEQ, pi, qi)
 			})
 		}
 		// return true
 		ret := nod(ORETURN, nil, nil)
 		ret.List.Append(nodbool(true))
 		fn.Nbody.Append(ret)
 	case TSTRUCT:
-		var cond *Node
+		// Build a list of conditions to satisfy.
 		// Track their order so that we can preserve aspects of that order.
 		type nodeIdx struct {
 			n   *Node
 			idx int
 		}
 		var conds []nodeIdx
 		and := func(n *Node) {
-			if cond == nil {
+			conds = append(conds, nodeIdx{n: n, idx: len(conds)})
 				cond = n
 				return
 			}
 			cond = nod(OANDAND, cond, n)
 		}
 		// Walk the struct using memequal for runs of AMEM
@ -573,7 +665,24 @@ func geneq(t *types.Type) *obj.LSym {
 			// Compare non-memory fields with field equality.
 			if !IsRegularMemory(f.Type) {
-				and(eqfield(np, nq, f.Sym))
+				p := nodSym(OXDOT, np, f.Sym)
 				q := nodSym(OXDOT, nq, f.Sym)
 				switch {
 				case f.Type.IsString():
 					eqlen, eqmem := eqstring(p, q)
 					and(eqlen)
 					and(eqmem)
 				case f.Type.IsInterface():
 					p.Type = f.Type
 					p = typecheck(p, ctxExpr)
 					q.Type = f.Type
 					q = typecheck(q, ctxExpr)
 					eqtab, eqdata := eqinterface(p, q)
 					and(eqtab)
 					and(eqdata)
 				default:
 					and(nod(OEQ, p, q))
 				}
 				i++
 				continue
 			}
@ -595,8 +704,24 @@ func geneq(t *types.Type) *obj.LSym {
 			i = next
 		}
-		if cond == nil {
+		// Sort conditions to put runtime calls last.
 		// Preserve the rest of the ordering.
 		sort.SliceStable(conds, func(i, j int) bool {
 			x, y := conds[i], conds[j]
 			if (x.n.Op != OCALL) == (y.n.Op != OCALL) {
 				return x.idx < y.idx
 			}
 			return x.n.Op != OCALL
 		})
 		var cond *Node
 		if len(conds) == 0 {
 			cond = nodbool(true)
 		} else {
 			cond = conds[0].n
 			for _, c := range conds[1:] {
 				cond = nod(OANDAND, cond, c.n)
 			}
 		}
 		ret := nod(ORETURN, nil, nil)
@ -643,6 +768,70 @@ func eqfield(p *Node, q *Node, field *types.Sym) *Node {
 	return ne
 }
 // eqstring returns the nodes
 //   len(s) == len(t)
 // and
 //   memequal(s.ptr, t.ptr, len(s))
 // which can be used to construct string equality comparison.
 // eqlen must be evaluated before eqmem, and shortcircuiting is required.
 func eqstring(s, t *Node) (eqlen, eqmem *Node) {
 	s = conv(s, types.Types[TSTRING])
 	t = conv(t, types.Types[TSTRING])
 	sptr := nod(OSPTR, s, nil)
 	tptr := nod(OSPTR, t, nil)
 	slen := conv(nod(OLEN, s, nil), types.Types[TUINTPTR])
 	tlen := conv(nod(OLEN, t, nil), types.Types[TUINTPTR])
 	fn := syslook("memequal")
 	fn = substArgTypes(fn, types.Types[TUINT8], types.Types[TUINT8])
 	call := nod(OCALL, fn, nil)
 	call.List.Append(sptr, tptr, slen.copy())
 	call = typecheck(call, ctxExpr|ctxMultiOK)
 	cmp := nod(OEQ, slen, tlen)
 	cmp = typecheck(cmp, ctxExpr)
 	cmp.Type = types.Types[TBOOL]
 	return cmp, call
 }
 // eqinterface returns the nodes
 //   s.tab == t.tab (or s.typ == t.typ, as appropriate)
 // and
 //   ifaceeq(s.tab, s.data, t.data) (or efaceeq(s.typ, s.data, t.data), as appropriate)
 // which can be used to construct interface equality comparison.
 // eqtab must be evaluated before eqdata, and shortcircuiting is required.
 func eqinterface(s, t *Node) (eqtab, eqdata *Node) {
 	if !types.Identical(s.Type, t.Type) {
 		Fatalf("eqinterface %v %v", s.Type, t.Type)
 	}
 	// func ifaceeq(tab *uintptr, x, y unsafe.Pointer) (ret bool)
 	// func efaceeq(typ *uintptr, x, y unsafe.Pointer) (ret bool)
 	var fn *Node
 	if s.Type.IsEmptyInterface() {
 		fn = syslook("efaceeq")
 	} else {
 		fn = syslook("ifaceeq")
 	}
 	stab := nod(OITAB, s, nil)
 	ttab := nod(OITAB, t, nil)
 	sdata := nod(OIDATA, s, nil)
 	tdata := nod(OIDATA, t, nil)
 	sdata.Type = types.Types[TUNSAFEPTR]
 	tdata.Type = types.Types[TUNSAFEPTR]
 	sdata.SetTypecheck(1)
 	tdata.SetTypecheck(1)
 	call := nod(OCALL, fn, nil)
 	call.List.Append(stab, sdata, tdata)
 	call = typecheck(call, ctxExpr|ctxMultiOK)
 	cmp := nod(OEQ, stab, ttab)
 	cmp = typecheck(cmp, ctxExpr)
 	cmp.Type = types.Types[TBOOL]
 	return cmp, call
 }
 // eqmem returns the node
 // 	memequal(&p.field, &q.field [, size])
 func eqmem(p *Node, q *Node, field *types.Sym, size int64) *Node {
--- a/src/cmd/compile/internal/gc/algkind_string.go
+++ b/src/cmd/compile/internal/gc/algkind_string.go
@ -0,0 +1,48 @@
 // Code generated by "stringer -type AlgKind -trimprefix A"; DO NOT EDIT.
 package gc
 import "strconv"
 func _() {
 	// An "invalid array index" compiler error signifies that the constant values have changed.
 	// Re-run the stringer command to generate them again.
 	var x [1]struct{}
 	_ = x[ANOEQ-0]
 	_ = x[AMEM0-1]
 	_ = x[AMEM8-2]
 	_ = x[AMEM16-3]
 	_ = x[AMEM32-4]
 	_ = x[AMEM64-5]
 	_ = x[AMEM128-6]
 	_ = x[ASTRING-7]
 	_ = x[AINTER-8]
 	_ = x[ANILINTER-9]
 	_ = x[AFLOAT32-10]
 	_ = x[AFLOAT64-11]
 	_ = x[ACPLX64-12]
 	_ = x[ACPLX128-13]
 	_ = x[AMEM-100]
 	_ = x[ASPECIAL - -1]
 }
 const (
 	_AlgKind_name_0 = "SPECIALNOEQMEM0MEM8MEM16MEM32MEM64MEM128STRINGINTERNILINTERFLOAT32FLOAT64CPLX64CPLX128"
 	_AlgKind_name_1 = "MEM"
 )
 var (
 	_AlgKind_index_0 = [...]uint8{0, 7, 11, 15, 19, 24, 29, 34, 40, 46, 51, 59, 66, 73, 79, 86}
 )
 func (i AlgKind) String() string {
 	switch {
 	case -1 <= i && i <= 13:
 		i -= -1
 		return _AlgKind_name_0[_AlgKind_index_0[i]:_AlgKind_index_0[i+1]]
 	case i == 100:
 		return _AlgKind_name_1
 	default:
 		return "AlgKind(" + strconv.FormatInt(int64(i), 10) + ")"
 	}
 }
--- a/src/cmd/compile/internal/gc/align.go
+++ b/src/cmd/compile/internal/gc/align.go
@ -319,10 +319,10 @@ func dowidth(t *types.Type) {
 		Fatalf("dowidth any")
 	case TSTRING:
-		if sizeof_String == 0 {
+		if sizeofString == 0 {
 			Fatalf("early dowidth string")
 		}
-		w = int64(sizeof_String)
+		w = sizeofString
 		t.Align = uint8(Widthptr)
 	case TARRAY:
@ -344,7 +344,7 @@ func dowidth(t *types.Type) {
 		if t.Elem() == nil {
 			break
 		}
-		w = int64(sizeof_Slice)
+		w = sizeofSlice
 		checkwidth(t.Elem())
 		t.Align = uint8(Widthptr)
--- a/src/cmd/compile/internal/gc/bench_test.go
+++ b/src/cmd/compile/internal/gc/bench_test.go
@ -0,0 +1,40 @@
 // Copyright 2020 The Go Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 package gc
 import "testing"
 var globl int64
 func BenchmarkLoadAdd(b *testing.B) {
 	x := make([]int64, 1024)
 	y := make([]int64, 1024)
 	for i := 0; i < b.N; i++ {
 		var s int64
 		for i := range x {
 			s ^= x[i] + y[i]
 		}
 		globl = s
 	}
 }
 func BenchmarkModify(b *testing.B) {
 	a := make([]int64, 1024)
 	v := globl
 	for i := 0; i < b.N; i++ {
 		for j := range a {
 			a[j] += v
 		}
 	}
 }
 func BenchmarkConstModify(b *testing.B) {
 	a := make([]int64, 1024)
 	for i := 0; i < b.N; i++ {
 		for j := range a {
 			a[j] += 3
 		}
 	}
 }
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@ -57,9 +57,9 @@ var runtimeDecls = [...]struct {
 	{"concatstrings", funcTag, 35},
 	{"cmpstring", funcTag, 36},
 	{"intstring", funcTag, 39},
-	{"slicebytetostring", funcTag, 41},
+	{"slicebytetostring", funcTag, 40},
-	{"slicebytetostringtmp", funcTag, 42},
+	{"slicebytetostringtmp", funcTag, 41},
-	{"slicerunetostring", funcTag, 45},
+	{"slicerunetostring", funcTag, 44},
 	{"stringtoslicebyte", funcTag, 46},
 	{"stringtoslicerune", funcTag, 49},
 	{"slicecopy", funcTag, 51},
@ -241,20 +241,20 @@ func runtimeTypes() []*types.Type {
 	typs[37] = types.NewArray(typs[0], 4)
 	typs[38] = types.NewPtr(typs[37])
 	typs[39] = functype(nil, []*Node{anonfield(typs[38]), anonfield(typs[19])}, []*Node{anonfield(typs[25])})
-	typs[40] = types.NewSlice(typs[0])
+	typs[40] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[1]), anonfield(typs[11])}, []*Node{anonfield(typs[25])})
-	typs[41] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[40])}, []*Node{anonfield(typs[25])})
+	typs[41] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[11])}, []*Node{anonfield(typs[25])})
-	typs[42] = functype(nil, []*Node{anonfield(typs[40])}, []*Node{anonfield(typs[25])})
+	typs[42] = types.Runetype
-	typs[43] = types.Runetype
+	typs[43] = types.NewSlice(typs[42])
-	typs[44] = types.NewSlice(typs[43])
+	typs[44] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[43])}, []*Node{anonfield(typs[25])})
-	typs[45] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[44])}, []*Node{anonfield(typs[25])})
+	typs[45] = types.NewSlice(typs[0])
-	typs[46] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[25])}, []*Node{anonfield(typs[40])})
+	typs[46] = functype(nil, []*Node{anonfield(typs[29]), anonfield(typs[25])}, []*Node{anonfield(typs[45])})
-	typs[47] = types.NewArray(typs[43], 32)
+	typs[47] = types.NewArray(typs[42], 32)
 	typs[48] = types.NewPtr(typs[47])
-	typs[49] = functype(nil, []*Node{anonfield(typs[48]), anonfield(typs[25])}, []*Node{anonfield(typs[44])})
+	typs[49] = functype(nil, []*Node{anonfield(typs[48]), anonfield(typs[25])}, []*Node{anonfield(typs[43])})
 	typs[50] = types.Types[TUINTPTR]
-	typs[51] = functype(nil, []*Node{anonfield(typs[2]), anonfield(typs[2]), anonfield(typs[50])}, []*Node{anonfield(typs[11])})
+	typs[51] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[11]), anonfield(typs[3]), anonfield(typs[11]), anonfield(typs[50])}, []*Node{anonfield(typs[11])})
-	typs[52] = functype(nil, []*Node{anonfield(typs[2]), anonfield(typs[2])}, []*Node{anonfield(typs[11])})
+	typs[52] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[11]), anonfield(typs[25])}, []*Node{anonfield(typs[11])})
-	typs[53] = functype(nil, []*Node{anonfield(typs[25]), anonfield(typs[11])}, []*Node{anonfield(typs[43]), anonfield(typs[11])})
+	typs[53] = functype(nil, []*Node{anonfield(typs[25]), anonfield(typs[11])}, []*Node{anonfield(typs[42]), anonfield(typs[11])})
 	typs[54] = functype(nil, []*Node{anonfield(typs[25])}, []*Node{anonfield(typs[11])})
 	typs[55] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2])})
 	typs[56] = types.Types[TUNSAFEPTR]
@ -293,7 +293,7 @@ func runtimeTypes() []*types.Type {
 	typs[89] = tostruct([]*Node{namedfield("enabled", typs[15]), namedfield("pad", typs[88]), namedfield("needed", typs[15]), namedfield("cgo", typs[15]), namedfield("alignme", typs[21])})
 	typs[90] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[3])}, nil)
 	typs[91] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, nil)
-	typs[92] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2]), anonfield(typs[2])}, []*Node{anonfield(typs[11])})
+	typs[92] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[11]), anonfield(typs[3]), anonfield(typs[11])}, []*Node{anonfield(typs[11])})
 	typs[93] = functype(nil, []*Node{anonfield(typs[86]), anonfield(typs[3])}, []*Node{anonfield(typs[15])})
 	typs[94] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[83])}, []*Node{anonfield(typs[15])})
 	typs[95] = types.NewPtr(typs[15])
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@ -69,13 +69,13 @@ func concatstrings(*[32]byte, []string) string
 func cmpstring(string, string) int
 func intstring(*[4]byte, int64) string
-func slicebytetostring(*[32]byte, []byte) string
+func slicebytetostring(buf *[32]byte, ptr *byte, n int) string
-func slicebytetostringtmp([]byte) string
+func slicebytetostringtmp(ptr *byte, n int) string
 func slicerunetostring(*[32]byte, []rune) string
 func stringtoslicebyte(*[32]byte, string) []byte
 func stringtoslicerune(*[32]rune, string) []rune
-func slicecopy(to any, fr any, wid uintptr) int
+func slicecopy(toPtr *any, toLen int, frPtr *any, frLen int, wid uintptr) int
-func slicestringcopy(to any, fr any) int
+func slicestringcopy(toPtr *byte, toLen int, fr string) int
 func decoderune(string, int) (retv rune, retk int)
 func countrunes(string) int
@ -162,7 +162,7 @@ var writeBarrier struct {
 // *byte is really *runtime.Type
 func typedmemmove(typ *byte, dst *any, src *any)
 func typedmemclr(typ *byte, dst *any)
-func typedslicecopy(typ *byte, dst any, src any) int
+func typedslicecopy(typ *byte, dstPtr *any, dstLen int, srcPtr *any, srcLen int) int
 func selectnbsend(hchan chan<- any, elem *any) bool
 func selectnbrecv(elem *any, hchan <-chan any) bool
--- a/src/cmd/compile/internal/gc/bv.go
+++ b/src/cmd/compile/internal/gc/bv.go
@ -4,6 +4,10 @@
 package gc
 import (
 	"math/bits"
 )
 const (
 	wordBits  = 32
 	wordMask  = wordBits - 1
@ -108,30 +112,11 @@ func (bv bvec) Next(i int32) int32 {
 	// Find 1 bit.
 	w := bv.b[i>>wordShift] >> uint(i&wordMask)
-
+	i += int32(bits.TrailingZeros32(w))
 	for w&1 == 0 {
 		w >>= 1
 		i++
 	}
 	return i
 }
 // Len returns the minimum number of bits required to represent bv.
 // The result is 0 if no bits are set in bv.
 func (bv bvec) Len() int32 {
 	for wi := len(bv.b) - 1; wi >= 0; wi-- {
 		if w := bv.b[wi]; w != 0 {
 			for i := wordBits - 1; i >= 0; i-- {
 				if w>>uint(i) != 0 {
 					return int32(wi)*wordBits + int32(i) + 1
 				}
 			}
 		}
 	}
 	return 0
 }
 func (bv bvec) IsEmpty() bool {
 	for _, x := range bv.b {
 		if x != 0 {
--- a/src/cmd/compile/internal/gc/closure.go
+++ b/src/cmd/compile/internal/gc/closure.go
@ -566,3 +566,20 @@ func walkpartialcall(n *Node, init *Nodes) *Node {
 	return walkexpr(clos, init)
 }
 // callpartMethod returns the *types.Field representing the method
 // referenced by method value n.
 func callpartMethod(n *Node) *types.Field {
 	if n.Op != OCALLPART {
 		Fatalf("expected OCALLPART, got %v", n)
 	}
 	// TODO(mdempsky): Optimize this. If necessary,
 	// makepartialcall could save m for us somewhere.
 	var m *types.Field
 	if lookdot0(n.Right.Sym, n.Left.Type, &m, false) != 1 {
 		Fatalf("failed to find field for OCALLPART")
 	}
 	return m
 }
--- a/src/cmd/compile/internal/gc/dcl.go
+++ b/src/cmd/compile/internal/gc/dcl.go
@ -590,14 +590,6 @@ func checkdupfields(what string, fss ...[]*types.Field) {
 // a type for struct/interface/arglist
 func tostruct(l []*Node) *types.Type {
 	t := types.New(TSTRUCT)
 	tostruct0(t, l)
 	return t
 }
 func tostruct0(t *types.Type, l []*Node) {
 	if t == nil || !t.IsStruct() {
 		Fatalf("struct expected")
 	}
 	fields := make([]*types.Field, len(l))
 	for i, n := range l {
@ -614,6 +606,8 @@ func tostruct0(t *types.Type, l []*Node) {
 	if !t.Broke() {
 		checkwidth(t)
 	}
 	return t
 }
 func tofunargs(l []*Node, funarg types.Funarg) *types.Type {
@ -684,15 +678,6 @@ func tointerface(l []*Node) *types.Type {
 		return types.Types[TINTER]
 	}
 	t := types.New(TINTER)
 	tointerface0(t, l)
 	return t
 }
 func tointerface0(t *types.Type, l []*Node) {
 	if t == nil || !t.IsInterface() {
 		Fatalf("interface expected")
 	}
 	var fields []*types.Field
 	for _, n := range l {
 		f := interfacefield(n)
@ -702,6 +687,7 @@ func tointerface0(t *types.Type, l []*Node) {
 		fields = append(fields, f)
 	}
 	t.SetInterface(fields)
 	return t
 }
 func fakeRecv() *Node {
@ -724,14 +710,6 @@ func isifacemethod(f *types.Type) bool {
 // turn a parsed function declaration into a type
 func functype(this *Node, in, out []*Node) *types.Type {
 	t := types.New(TFUNC)
 	functype0(t, this, in, out)
 	return t
 }
 func functype0(t *types.Type, this *Node, in, out []*Node) {
 	if t == nil || t.Etype != TFUNC {
 		Fatalf("function type expected")
 	}
 	var rcvr []*Node
 	if this != nil {
@ -748,15 +726,13 @@ func functype0(t *types.Type, this *Node, in, out []*Node) {
 	}
 	t.FuncType().Outnamed = t.NumResults() > 0 && origSym(t.Results().Field(0).Sym) != nil
 	return t
 }
 func functypefield(this *types.Field, in, out []*types.Field) *types.Type {
 	t := types.New(TFUNC)
 	functypefield0(t, this, in, out)
 	return t
 }
 func functypefield0(t *types.Type, this *types.Field, in, out []*types.Field) {
 	var rcvr []*types.Field
 	if this != nil {
 		rcvr = []*types.Field{this}
@ -766,6 +742,8 @@ func functypefield0(t *types.Type, this *types.Field, in, out []*types.Field) {
 	t.FuncType().Results = tofunargsfield(out, types.FunargResults)
 	t.FuncType().Outnamed = t.NumResults() > 0 && origSym(t.Results().Field(0).Sym) != nil
 	return t
 }
 // origSym returns the original symbol written by the user.
--- a/src/cmd/compile/internal/gc/escape.go
+++ b/src/cmd/compile/internal/gc/escape.go
@ -7,6 +7,7 @@ package gc
 import (
 	"cmd/compile/internal/logopt"
 	"cmd/compile/internal/types"
 	"cmd/internal/src"
 	"fmt"
 	"math"
 	"strings"
@ -427,7 +428,12 @@ func (e *Escape) exprSkipInit(k EscHole, n *Node) {
 		lineno = lno
 	}()
-	if k.derefs >= 0 && !types.Haspointers(n.Type) {
+	uintptrEscapesHack := k.uintptrEscapesHack
 	k.uintptrEscapesHack = false
 	if uintptrEscapesHack && n.Op == OCONVNOP && n.Left.Type.IsUnsafePtr() {
 		// nop
 	} else if k.derefs >= 0 && !types.Haspointers(n.Type) {
 		k = e.discardHole()
 	}
@ -521,10 +527,26 @@ func (e *Escape) exprSkipInit(k EscHole, n *Node) {
 		// nop
 	case OCALLPART:
-		e.spill(k, n)
+		// Flow the receiver argument to both the closure and
 		// to the receiver parameter.
-		// TODO(mdempsky): We can do better here. See #27557.
+		closureK := e.spill(k, n)
-		e.assignHeap(n.Left, "call part", n)
+
 		m := callpartMethod(n)
 		// We don't know how the method value will be called
 		// later, so conservatively assume the result
 		// parameters all flow to the heap.
 		//
 		// TODO(mdempsky): Change ks into a callback, so that
 		// we don't have to create this dummy slice?
 		var ks []EscHole
 		for i := m.Type.NumResults(); i > 0; i-- {
 			ks = append(ks, e.heapHole())
 		}
 		paramK := e.tagHole(ks, asNode(m.Type.Nname()), m.Type.Recv())
 		e.expr(e.teeHole(paramK, closureK), n.Left)
 	case OPTRLIT:
 		e.expr(e.spill(k, n), n.Left)
@ -539,6 +561,7 @@ func (e *Escape) exprSkipInit(k EscHole, n *Node) {
 	case OSLICELIT:
 		k = e.spill(k, n)
 		k.uintptrEscapesHack = uintptrEscapesHack // for ...uintptr parameters
 		for _, elt := range n.List.Slice() {
 			if elt.Op == OKEY {
@ -717,197 +740,140 @@ func (e *Escape) assignHeap(src *Node, why string, where *Node) {
 // should contain the holes representing where the function callee's
 // results flows; where is the OGO/ODEFER context of the call, if any.
 func (e *Escape) call(ks []EscHole, call, where *Node) {
-	// First, pick out the function callee, its type, and receiver
+	topLevelDefer := where != nil && where.Op == ODEFER && e.loopDepth == 1
-	// (if any) and normal arguments list.
+	if topLevelDefer {
-	var fn, recv *Node
+		// force stack allocation of defer record, unless
-	var fntype *types.Type
+		// open-coded defers are used (see ssa.go)
-	args := call.List.Slice()
+		where.Esc = EscNever
 	switch call.Op {
 	case OCALLFUNC:
 		fn = call.Left
 		if fn.Op == OCLOSURE {
 			fn = fn.Func.Closure.Func.Nname
 		}
 		fntype = fn.Type
 	case OCALLMETH:
 		fn = asNode(call.Left.Type.FuncType().Nname)
 		fntype = fn.Type
 		recv = call.Left.Left
 	case OCALLINTER:
 		fntype = call.Left.Type
 		recv = call.Left.Left
 	case OAPPEND, ODELETE, OPRINT, OPRINTN, ORECOVER:
 		// ok
 	case OLEN, OCAP, OREAL, OIMAG, OCLOSE, OPANIC:
 		args = []*Node{call.Left}
 	case OCOMPLEX, OCOPY:
 		args = []*Node{call.Left, call.Right}
 	default:
 		Fatalf("unexpected call op: %v", call.Op)
 	}
-	static := fn != nil && fn.Op == ONAME && fn.Class() == PFUNC
+	argument := func(k EscHole, arg *Node) {
-
+		if topLevelDefer {
-	// Setup evaluation holes for each receiver/argument.
+			// Top level defers arguments don't escape to
-	var recvK EscHole
+			// heap, but they do need to last until end of
-	var paramKs []EscHole
+			// function.
-
+			k = e.later(k)
-	if static && fn.Name.Defn != nil && fn.Name.Defn.Esc < EscFuncTagged {
+		} else if where != nil {
-		// Static call to function in same mutually recursive
+			k = e.heapHole()
 		// group; incorporate into data flow graph.
 		if fn.Name.Defn.Esc == EscFuncUnknown {
 			Fatalf("graph inconsistency")
 		}
-		if ks != nil {
+		e.expr(k.note(call, "call parameter"), arg)
-			for i, result := range fntype.Results().FieldSlice() {
+	}
 	switch call.Op {
 	default:
 		Fatalf("unexpected call op: %v", call.Op)
 	case OCALLFUNC, OCALLMETH, OCALLINTER:
 		fixVariadicCall(call)
 		// Pick out the function callee, if statically known.
 		var fn *Node
 		switch call.Op {
 		case OCALLFUNC:
 			if call.Left.Op == ONAME && call.Left.Class() == PFUNC {
 				fn = call.Left
 			} else if call.Left.Op == OCLOSURE {
 				fn = call.Left.Func.Closure.Func.Nname
 			}
 		case OCALLMETH:
 			fn = asNode(call.Left.Type.FuncType().Nname)
 		}
 		fntype := call.Left.Type
 		if fn != nil {
 			fntype = fn.Type
 		}
 		if ks != nil && fn != nil && e.inMutualBatch(fn) {
 			for i, result := range fn.Type.Results().FieldSlice() {
 				e.expr(ks[i], asNode(result.Nname))
 			}
 		}
 		if r := fntype.Recv(); r != nil {
-			recvK = e.addr(asNode(r.Nname))
+			argument(e.tagHole(ks, fn, r), call.Left.Left)
-		}
+		} else {
-		for _, param := range fntype.Params().FieldSlice() {
+			// Evaluate callee function expression.
-			paramKs = append(paramKs, e.addr(asNode(param.Nname)))
+			argument(e.discardHole(), call.Left)
 		}
 	} else if call.Op == OCALLFUNC || call.Op == OCALLMETH || call.Op == OCALLINTER {
 		// Dynamic call, or call to previously tagged
 		// function. Setup flows to heap and/or ks according
 		// to parameter tags.
 		if r := fntype.Recv(); r != nil {
 			recvK = e.tagHole(ks, r, static)
 		}
 		for _, param := range fntype.Params().FieldSlice() {
 			paramKs = append(paramKs, e.tagHole(ks, param, static))
 		}
 	} else {
 		// Handle escape analysis for builtins.
 		// By default, we just discard everything.
 		for range args {
 			paramKs = append(paramKs, e.discardHole())
 		}
-		switch call.Op {
+		args := call.List.Slice()
-		case OAPPEND:
+		for i, param := range fntype.Params().FieldSlice() {
-			// Appendee slice may flow directly to the
+			argument(e.tagHole(ks, fn, param), args[i])
-			// result, if it has enough capacity.
+		}
-			// Alternatively, a new heap slice might be
+
-			// allocated, and all slice elements might
+	case OAPPEND:
-			// flow to heap.
+		args := call.List.Slice()
-			paramKs[0] = e.teeHole(paramKs[0], ks[0])
+
-			if types.Haspointers(args[0].Type.Elem()) {
+		// Appendee slice may flow directly to the result, if
-				paramKs[0] = e.teeHole(paramKs[0], e.heapHole().deref(call, "appendee slice"))
+		// it has enough capacity. Alternatively, a new heap
 		// slice might be allocated, and all slice elements
 		// might flow to heap.
 		appendeeK := ks[0]
 		if types.Haspointers(args[0].Type.Elem()) {
 			appendeeK = e.teeHole(appendeeK, e.heapHole().deref(call, "appendee slice"))
 		}
 		argument(appendeeK, args[0])
 		if call.IsDDD() {
 			appendedK := e.discardHole()
 			if args[1].Type.IsSlice() && types.Haspointers(args[1].Type.Elem()) {
 				appendedK = e.heapHole().deref(call, "appended slice...")
 			}
-
+			argument(appendedK, args[1])
-			if call.IsDDD() {
+		} else {
-				if args[1].Type.IsSlice() && types.Haspointers(args[1].Type.Elem()) {
+			for _, arg := range args[1:] {
-					paramKs[1] = e.teeHole(paramKs[1], e.heapHole().deref(call, "appended slice..."))
+				argument(e.heapHole(), arg)
 				}
 			} else {
 				for i := 1; i < len(args); i++ {
 					paramKs[i] = e.heapHole()
 				}
 			}
 		case OCOPY:
 			if call.Right.Type.IsSlice() && types.Haspointers(call.Right.Type.Elem()) {
 				paramKs[1] = e.teeHole(paramKs[1], e.heapHole().deref(call, "copied slice"))
 			}
 		case OPANIC:
 			paramKs[0] = e.heapHole()
 		}
 	}
 	if call.Op == OCALLFUNC {
 		// Evaluate callee function expression.
 		e.expr(e.augmentParamHole(e.discardHole(), call, where), call.Left)
 	}
 	if recv != nil {
 		// TODO(mdempsky): Handle go:uintptrescapes here too?
 		e.expr(e.augmentParamHole(recvK, call, where), recv)
 	}
 	// Apply augmentParamHole before ODDDARG so that it affects
 	// the implicit slice allocation for variadic calls, if any.
 	for i, paramK := range paramKs {
 		paramKs[i] = e.augmentParamHole(paramK, call, where)
 	}
 	// TODO(mdempsky): Remove after early ddd-ification.
 	if fntype != nil && fntype.IsVariadic() && !call.IsDDD() {
 		vi := fntype.NumParams() - 1
 		elt := fntype.Params().Field(vi).Type.Elem()
 		nva := call.List.Len()
 		nva -= vi
 		// Introduce ODDDARG node to represent ... allocation.
 		ddd := nodl(call.Pos, ODDDARG, nil, nil)
 		ddd.Type = types.NewPtr(types.NewArray(elt, int64(nva)))
 		call.Right = ddd
 		dddK := e.spill(paramKs[vi], ddd)
 		paramKs = paramKs[:vi]
 		for i := 0; i < nva; i++ {
 			paramKs = append(paramKs, dddK)
 		}
 	}
 	for i, arg := range args {
 		// For arguments to go:uintptrescapes, peel
 		// away an unsafe.Pointer->uintptr conversion,
 		// if present.
 		if static && arg.Op == OCONVNOP && arg.Type.Etype == TUINTPTR && arg.Left.Type.Etype == TUNSAFEPTR {
 			x := i
 			if fntype.IsVariadic() && x >= fntype.NumParams() {
 				x = fntype.NumParams() - 1
 			}
 			if fntype.Params().Field(x).Note == uintptrEscapesTag {
 				arg = arg.Left
 			}
 		}
-		// no augmentParamHole here; handled in loop before ODDDARG
+	case OCOPY:
-		e.expr(paramKs[i], arg)
+		argument(e.discardHole(), call.Left)
 	}
 }
-// augmentParamHole augments parameter holes as necessary for use in
+		copiedK := e.discardHole()
-// go/defer statements.
+		if call.Right.Type.IsSlice() && types.Haspointers(call.Right.Type.Elem()) {
-func (e *Escape) augmentParamHole(k EscHole, call, where *Node) EscHole {
+			copiedK = e.heapHole().deref(call, "copied slice")
-	k = k.note(call, "call parameter")
+		}
-	if where == nil {
+		argument(copiedK, call.Right)
 		return k
 	}
-	// Top level defers arguments don't escape to heap, but they
+	case OPANIC:
-	// do need to last until end of function. Tee with a
+		argument(e.heapHole(), call.Left)
 	// non-transient location to avoid arguments from being
 	// transiently allocated.
 	if where.Op == ODEFER && e.loopDepth == 1 {
 		// force stack allocation of defer record, unless open-coded
 		// defers are used (see ssa.go)
 		where.Esc = EscNever
 		return e.later(k)
 	}
-	return e.heapHole().note(where, "call parameter")
+	case OCOMPLEX:
 		argument(e.discardHole(), call.Left)
 		argument(e.discardHole(), call.Right)
 	case ODELETE, OPRINT, OPRINTN, ORECOVER:
 		for _, arg := range call.List.Slice() {
 			argument(e.discardHole(), arg)
 		}
 	case OLEN, OCAP, OREAL, OIMAG, OCLOSE:
 		argument(e.discardHole(), call.Left)
 	}
 }
 // tagHole returns a hole for evaluating an argument passed to param.
 // ks should contain the holes representing where the function
-// callee's results flows; static indicates whether this is a static
+// callee's results flows. fn is the statically-known callee function,
-// call.
+// if any.
-func (e *Escape) tagHole(ks []EscHole, param *types.Field, static bool) EscHole {
+func (e *Escape) tagHole(ks []EscHole, fn *Node, param *types.Field) EscHole {
 	// If this is a dynamic call, we can't rely on param.Note.
-	if !static {
+	if fn == nil {
 		return e.heapHole()
 	}
 	if e.inMutualBatch(fn) {
 		return e.addr(asNode(param.Nname))
 	}
 	// Call to previously tagged function.
 	if param.Note == uintptrEscapesTag {
 		k := e.heapHole()
 		k.uintptrEscapesHack = true
 		return k
 	}
 	var tagKs []EscHole
 	esc := ParseLeaks(param.Note)
@ -926,6 +892,21 @@ func (e *Escape) tagHole(ks []EscHole, param *types.Field, static bool) EscHole
 	return e.teeHole(tagKs...)
 }
 // inMutualBatch reports whether function fn is in the batch of
 // mutually recursive functions being analyzed. When this is true,
 // fn has not yet been analyzed, so its parameters and results
 // should be incorporated directly into the flow graph instead of
 // relying on its escape analysis tagging.
 func (e *Escape) inMutualBatch(fn *Node) bool {
 	if fn.Name.Defn != nil && fn.Name.Defn.Esc < EscFuncTagged {
 		if fn.Name.Defn.Esc == EscFuncUnknown {
 			Fatalf("graph inconsistency")
 		}
 		return true
 	}
 	return false
 }
 // An EscHole represents a context for evaluation a Go
 // expression. E.g., when evaluating p in "x = **p", we'd have a hole
 // with dst==x and derefs==2.
@ -933,6 +914,10 @@ type EscHole struct {
 	dst    *EscLocation
 	derefs int // >= -1
 	notes  *EscNote
 	// uintptrEscapesHack indicates this context is evaluating an
 	// argument for a //go:uintptrescapes function.
 	uintptrEscapesHack bool
 }
 type EscNote struct {
@ -945,7 +930,7 @@ func (k EscHole) note(where *Node, why string) EscHole {
 	if where == nil || why == "" {
 		Fatalf("note: missing where/why")
 	}
-	if Debug['m'] >= 2 {
+	if Debug['m'] >= 2 || logopt.Enabled() {
 		k.notes = &EscNote{
 			next:  k.notes,
 			where: where,
@ -1092,10 +1077,16 @@ func (e *Escape) flow(k EscHole, src *EscLocation) {
 		return
 	}
 	if dst.escapes && k.derefs < 0 { // dst = &src
-		if Debug['m'] >= 2 {
+		if Debug['m'] >= 2 || logopt.Enabled() {
 			pos := linestr(src.n.Pos)
-			fmt.Printf("%s: %v escapes to heap:\n", pos, src.n)
+			if Debug['m'] >= 2 {
-			e.explainFlow(pos, dst, src, k.derefs, k.notes)
+				fmt.Printf("%s: %v escapes to heap:\n", pos, src.n)
 			}
 			explanation := e.explainFlow(pos, dst, src, k.derefs, k.notes, []*logopt.LoggedOpt{})
 			if logopt.Enabled() {
 				logopt.LogOpt(src.n.Pos, "escapes", "escape", e.curfn.funcname(), fmt.Sprintf("%v escapes to heap", src.n), explanation)
 			}
 		}
 		src.escapes = true
 		return
@ -1119,7 +1110,8 @@ func (e *Escape) walkAll() {
 	// transient->!transient and !escapes->escapes, which can each
 	// happen at most once. So we take Θ(len(e.allLocs)) walks.
-	var todo []*EscLocation // LIFO queue
+	// LIFO queue, has enough room for e.allLocs and e.heapLoc.
 	todo := make([]*EscLocation, 0, len(e.allLocs)+1)
 	enqueue := func(loc *EscLocation) {
 		if !loc.queued {
 			todo = append(todo, loc)
@ -1187,9 +1179,15 @@ func (e *Escape) walkOne(root *EscLocation, walkgen uint32, enqueue func(*EscLoc
 			// that value flow for tagging the function
 			// later.
 			if l.isName(PPARAM) {
-				if Debug['m'] >= 2 && !l.escapes {
+				if (logopt.Enabled() || Debug['m'] >= 2) && !l.escapes {
-					fmt.Printf("%s: parameter %v leaks to %s with derefs=%d:\n", linestr(l.n.Pos), l.n, e.explainLoc(root), base)
+					if Debug['m'] >= 2 {
-					e.explainPath(root, l)
+						fmt.Printf("%s: parameter %v leaks to %s with derefs=%d:\n", linestr(l.n.Pos), l.n, e.explainLoc(root), base)
 					}
 					explanation := e.explainPath(root, l)
 					if logopt.Enabled() {
 						logopt.LogOpt(l.n.Pos, "leak", "escape", e.curfn.funcname(),
 							fmt.Sprintf("parameter %v leaks to %s with derefs=%d", l.n, e.explainLoc(root), base), explanation)
 					}
 				}
 				l.leakTo(root, base)
 			}
@ -1198,9 +1196,14 @@ func (e *Escape) walkOne(root *EscLocation, walkgen uint32, enqueue func(*EscLoc
 			// outlives it, then l needs to be heap
 			// allocated.
 			if addressOf && !l.escapes {
-				if Debug['m'] >= 2 {
+				if logopt.Enabled() || Debug['m'] >= 2 {
-					fmt.Printf("%s: %v escapes to heap:\n", linestr(l.n.Pos), l.n)
+					if Debug['m'] >= 2 {
-					e.explainPath(root, l)
+						fmt.Printf("%s: %v escapes to heap:\n", linestr(l.n.Pos), l.n)
 					}
 					explanation := e.explainPath(root, l)
 					if logopt.Enabled() {
 						logopt.LogOpt(l.n.Pos, "escape", "escape", e.curfn.funcname(), fmt.Sprintf("%v escapes to heap", l.n), explanation)
 					}
 				}
 				l.escapes = true
 				enqueue(l)
@ -1225,43 +1228,67 @@ func (e *Escape) walkOne(root *EscLocation, walkgen uint32, enqueue func(*EscLoc
 }
 // explainPath prints an explanation of how src flows to the walk root.
-func (e *Escape) explainPath(root, src *EscLocation) {
+func (e *Escape) explainPath(root, src *EscLocation) []*logopt.LoggedOpt {
 	visited := make(map[*EscLocation]bool)
 	pos := linestr(src.n.Pos)
 	var explanation []*logopt.LoggedOpt
 	for {
 		// Prevent infinite loop.
 		if visited[src] {
-			fmt.Printf("%s:   warning: truncated explanation due to assignment cycle; see golang.org/issue/35518\n", pos)
+			if Debug['m'] >= 2 {
 				fmt.Printf("%s:   warning: truncated explanation due to assignment cycle; see golang.org/issue/35518\n", pos)
 			}
 			break
 		}
 		visited[src] = true
 		dst := src.dst
 		edge := &dst.edges[src.dstEdgeIdx]
 		if edge.src != src {
 			Fatalf("path inconsistency: %v != %v", edge.src, src)
 		}
-		e.explainFlow(pos, dst, src, edge.derefs, edge.notes)
+		explanation = e.explainFlow(pos, dst, src, edge.derefs, edge.notes, explanation)
 		if dst == root {
 			break
 		}
 		src = dst
 	}
 	return explanation
 }
-func (e *Escape) explainFlow(pos string, dst, src *EscLocation, derefs int, notes *EscNote) {
+func (e *Escape) explainFlow(pos string, dst, srcloc *EscLocation, derefs int, notes *EscNote, explanation []*logopt.LoggedOpt) []*logopt.LoggedOpt {
 	ops := "&"
 	if derefs >= 0 {
 		ops = strings.Repeat("*", derefs)
 	}
 	print := Debug['m'] >= 2
-	fmt.Printf("%s:   flow: %s = %s%v:\n", pos, e.explainLoc(dst), ops, e.explainLoc(src))
+	flow := fmt.Sprintf("   flow: %s = %s%v:", e.explainLoc(dst), ops, e.explainLoc(srcloc))
-	for note := notes; note != nil; note = note.next {
+	if print {
-		fmt.Printf("%s:     from %v (%v) at %s\n", pos, note.where, note.why, linestr(note.where.Pos))
+		fmt.Printf("%s:%s\n", pos, flow)
 	}
 	if logopt.Enabled() {
 		var epos src.XPos
 		if notes != nil {
 			epos = notes.where.Pos
 		} else if srcloc != nil && srcloc.n != nil {
 			epos = srcloc.n.Pos
 		}
 		explanation = append(explanation, logopt.NewLoggedOpt(epos, "escflow", "escape", e.curfn.funcname(), flow))
 	}
 	for note := notes; note != nil; note = note.next {
 		if print {
 			fmt.Printf("%s:     from %v (%v) at %s\n", pos, note.where, note.why, linestr(note.where.Pos))
 		}
 		if logopt.Enabled() {
 			explanation = append(explanation, logopt.NewLoggedOpt(note.where.Pos, "escflow", "escape", e.curfn.funcname(),
 				fmt.Sprintf("     from %v (%v)", note.where, note.why)))
 		}
 	}
 	return explanation
 }
 func (e *Escape) explainLoc(l *EscLocation) string {
--- a/src/cmd/compile/internal/gc/fmt.go
+++ b/src/cmd/compile/internal/gc/fmt.go
@ -586,7 +586,7 @@ s%~	%%g
 */
 func symfmt(b *bytes.Buffer, s *types.Sym, flag FmtFlag, mode fmtMode) {
-	if s.Pkg != nil && flag&FmtShort == 0 {
+	if flag&FmtShort == 0 {
 		switch mode {
 		case FErr: // This is for the user
 			if s.Pkg == builtinpkg || s.Pkg == localpkg {
@ -1306,9 +1306,6 @@ func (n *Node) exprfmt(s fmt.State, prec int, mode fmtMode) {
 	case OPAREN:
 		mode.Fprintf(s, "(%v)", n.Left)
 	case ODDDARG:
 		fmt.Fprint(s, "... argument")
 	case OLITERAL: // this is a bit of a mess
 		if mode == FErr {
 			if n.Orig != nil && n.Orig != n {
@ -1404,6 +1401,10 @@ func (n *Node) exprfmt(s fmt.State, prec int, mode fmtMode) {
 	case OCOMPLIT:
 		if mode == FErr {
 			if n.Implicit() {
 				mode.Fprintf(s, "... argument")
 				return
 			}
 			if n.Right != nil {
 				mode.Fprintf(s, "%v literal", n.Right)
 				return
--- a/src/cmd/compile/internal/gc/go.go
+++ b/src/cmd/compile/internal/gc/go.go
@ -45,6 +45,14 @@ func isRuntimePkg(p *types.Pkg) bool {
 	return p.Path == "runtime"
 }
 // isReflectPkg reports whether p is package reflect.
 func isReflectPkg(p *types.Pkg) bool {
 	if p == localpkg {
 		return myimportpath == "reflect"
 	}
 	return p.Path == "reflect"
 }
 // The Class of a variable/function describes the "storage class"
 // of a variable or function. During parsing, storage classes are
 // called declaration contexts.
@ -64,32 +72,30 @@ const (
 	_ = uint((1 << 3) - iota) // static assert for iota <= (1 << 3)
 )
-// note this is the runtime representation
+// Slices in the runtime are represented by three components:
 // of the compilers slices.
 //
-// typedef	struct
+// type slice struct {
-// {				// must not move anything
+// 	ptr unsafe.Pointer
-// 	uchar	array[8];	// pointer to data
+// 	len int
-// 	uchar	nel[4];		// number of elements
+// 	cap int
-// 	uchar	cap[4];		// allocated number of elements
+// }
 // } Slice;
 var slice_array int // runtime offsetof(Slice,array) - same for String
 var slice_nel int // runtime offsetof(Slice,nel) - same for String
 var slice_cap int // runtime offsetof(Slice,cap)
 var sizeof_Slice int // runtime sizeof(Slice)
 // note this is the runtime representation
 // of the compilers strings.
 //
-// typedef	struct
+// Strings in the runtime are represented by two components:
-// {				// must not move anything
+//
-// 	uchar	array[8];	// pointer to data
+// type string struct {
-// 	uchar	nel[4];		// number of elements
+// 	ptr unsafe.Pointer
-// } String;
+// 	len int
-var sizeof_String int // runtime sizeof(String)
+// }
 //
 // These variables are the offsets of fields and sizes of these structs.
 var (
 	slicePtrOffset int64
 	sliceLenOffset int64
 	sliceCapOffset int64
 	sizeofSlice  int64
 	sizeofString int64
 )
 var pragcgobuf [][]string
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@ -70,9 +70,13 @@ func newProgs(fn *Node, worker int) *Progs {
 	pp.pos = fn.Pos
 	pp.settext(fn)
 	pp.nextLive = LivenessInvalid
 	// PCDATA tables implicitly start with index -1.
-	pp.prevLive = LivenessIndex{-1, -1}
+	pp.prevLive = LivenessIndex{-1, -1, false}
 	if go115ReduceLiveness {
 		pp.nextLive = pp.prevLive
 	} else {
 		pp.nextLive = LivenessInvalid
 	}
 	return pp
 }
@ -109,7 +113,7 @@ func (pp *Progs) Free() {
 // Prog adds a Prog with instruction As to pp.
 func (pp *Progs) Prog(as obj.As) *obj.Prog {
-	if pp.nextLive.stackMapIndex != pp.prevLive.stackMapIndex {
+	if pp.nextLive.StackMapValid() && pp.nextLive.stackMapIndex != pp.prevLive.stackMapIndex {
 		// Emit stack map index change.
 		idx := pp.nextLive.stackMapIndex
 		pp.prevLive.stackMapIndex = idx
@ -117,13 +121,32 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
 		Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
 		Addrconst(&p.To, int64(idx))
 	}
-	if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
+	if !go115ReduceLiveness {
-		// Emit register map index change.
+		if pp.nextLive.isUnsafePoint {
-		idx := pp.nextLive.regMapIndex
+			// Unsafe points are encoded as a special value in the
-		pp.prevLive.regMapIndex = idx
+			// register map.
-		p := pp.Prog(obj.APCDATA)
+			pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
-		Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
+		}
-		Addrconst(&p.To, int64(idx))
+		if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
 			// Emit register map index change.
 			idx := pp.nextLive.regMapIndex
 			pp.prevLive.regMapIndex = idx
 			p := pp.Prog(obj.APCDATA)
 			Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
 			Addrconst(&p.To, int64(idx))
 		}
 	} else {
 		if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
 			// Emit unsafe-point marker.
 			pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
 			p := pp.Prog(obj.APCDATA)
 			Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
 			if pp.nextLive.isUnsafePoint {
 				Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
 			} else {
 				Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
 			}
 		}
 	}
 	p := pp.next
--- a/src/cmd/compile/internal/gc/iexport.go
+++ b/src/cmd/compile/internal/gc/iexport.go
@ -35,6 +35,8 @@
 //         }
 //     }
 //
 //     Fingerprint [8]byte
 //
 // uvarint means a uint64 written out using uvarint encoding.
 //
 // []T means a uvarint followed by that many T objects. In other
@ -296,6 +298,10 @@ func iexport(out *bufio.Writer) {
 	io.Copy(out, &hdr)
 	io.Copy(out, &p.strings)
 	io.Copy(out, &p.data0)
 	// Add fingerprint (used by linker object file).
 	// Attach this to the end, so tools (e.g. gcimporter) don't care.
 	out.Write(Ctxt.Fingerprint[:])
 }
 // writeIndex writes out an object index. mainIndex indicates whether
@ -991,7 +997,7 @@ func (w *exportWriter) linkname(s *types.Sym) {
 }
 func (w *exportWriter) symIdx(s *types.Sym) {
-	if Ctxt.Flag_newobj {
+	if Ctxt.Flag_go115newobj {
 		lsym := s.Linksym()
 		if lsym.PkgIdx > goobj2.PkgIdxSelf || (lsym.PkgIdx == goobj2.PkgIdxInvalid && !lsym.Indexed()) || s.Linkname != "" {
 			// Don't export index for non-package symbols, linkname'd symbols,
--- a/src/cmd/compile/internal/gc/iimport.go
+++ b/src/cmd/compile/internal/gc/iimport.go
@ -10,6 +10,7 @@ package gc
 import (
 	"cmd/compile/internal/types"
 	"cmd/internal/bio"
 	"cmd/internal/goobj2"
 	"cmd/internal/obj"
 	"cmd/internal/src"
 	"encoding/binary"
@ -95,7 +96,7 @@ func (r *intReader) uint64() uint64 {
 	return i
 }
-func iimport(pkg *types.Pkg, in *bio.Reader) {
+func iimport(pkg *types.Pkg, in *bio.Reader) (fingerprint goobj2.FingerprintType) {
 	ir := &intReader{in, pkg}
 	version := ir.uint64()
@ -188,6 +189,14 @@ func iimport(pkg *types.Pkg, in *bio.Reader) {
 			inlineImporter[s] = iimporterAndOffset{p, off}
 		}
 	}
 	// Fingerprint
 	n, err := in.Read(fingerprint[:])
 	if err != nil || n != len(fingerprint) {
 		yyerror("import %s: error reading fingerprint", pkg.Path)
 		errorexit()
 	}
 	return fingerprint
 }
 type iimporter struct {
@ -687,7 +696,7 @@ func (r *importReader) linkname(s *types.Sym) {
 }
 func (r *importReader) symIdx(s *types.Sym) {
-	if Ctxt.Flag_newobj {
+	if Ctxt.Flag_go115newobj {
 		lsym := s.Linksym()
 		idx := int32(r.int64())
 		if idx != -1 {
@ -790,9 +799,6 @@ func (r *importReader) node() *Node {
 	// case OPAREN:
 	// 	unreachable - unpacked by exporter
 	// case ODDDARG:
 	//	unimplemented
 	case OLITERAL:
 		pos := r.pos()
 		typ, val := r.value()
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@ -496,7 +496,14 @@ func inlcalls(fn *Node) {
 	if countNodes(fn) >= inlineBigFunctionNodes {
 		maxCost = inlineBigFunctionMaxCost
 	}
-	fn = inlnode(fn, maxCost)
+	// Map to keep track of functions that have been inlined at a particular
 	// call site, in order to stop inlining when we reach the beginning of a
 	// recursion cycle again. We don't inline immediately recursive functions,
 	// but allow inlining if there is a recursion cycle of many functions.
 	// Most likely, the inlining will stop before we even hit the beginning of
 	// the cycle again, but the map catches the unusual case.
 	inlMap := make(map[*Node]bool)
 	fn = inlnode(fn, maxCost, inlMap)
 	if fn != Curfn {
 		Fatalf("inlnode replaced curfn")
 	}
@ -537,10 +544,10 @@ func inlconv2list(n *Node) []*Node {
 	return s
 }
-func inlnodelist(l Nodes, maxCost int32) {
+func inlnodelist(l Nodes, maxCost int32, inlMap map[*Node]bool) {
 	s := l.Slice()
 	for i := range s {
-		s[i] = inlnode(s[i], maxCost)
+		s[i] = inlnode(s[i], maxCost, inlMap)
 	}
 }
@ -557,7 +564,7 @@ func inlnodelist(l Nodes, maxCost int32) {
 // shorter and less complicated.
 // The result of inlnode MUST be assigned back to n, e.g.
 // 	n.Left = inlnode(n.Left)
-func inlnode(n *Node, maxCost int32) *Node {
+func inlnode(n *Node, maxCost int32, inlMap map[*Node]bool) *Node {
 	if n == nil {
 		return n
 	}
@ -578,26 +585,26 @@ func inlnode(n *Node, maxCost int32) *Node {
 	case OCALLMETH:
 		// Prevent inlining some reflect.Value methods when using checkptr,
 		// even when package reflect was compiled without it (#35073).
-		if s := n.Left.Sym; Debug_checkptr != 0 && s.Pkg.Path == "reflect" && (s.Name == "Value.UnsafeAddr" || s.Name == "Value.Pointer") {
+		if s := n.Left.Sym; Debug_checkptr != 0 && isReflectPkg(s.Pkg) && (s.Name == "Value.UnsafeAddr" || s.Name == "Value.Pointer") {
 			return n
 		}
 	}
 	lno := setlineno(n)
-	inlnodelist(n.Ninit, maxCost)
+	inlnodelist(n.Ninit, maxCost, inlMap)
 	for _, n1 := range n.Ninit.Slice() {
 		if n1.Op == OINLCALL {
 			inlconv2stmt(n1)
 		}
 	}
-	n.Left = inlnode(n.Left, maxCost)
+	n.Left = inlnode(n.Left, maxCost, inlMap)
 	if n.Left != nil && n.Left.Op == OINLCALL {
 		n.Left = inlconv2expr(n.Left)
 	}
-	n.Right = inlnode(n.Right, maxCost)
+	n.Right = inlnode(n.Right, maxCost, inlMap)
 	if n.Right != nil && n.Right.Op == OINLCALL {
 		if n.Op == OFOR || n.Op == OFORUNTIL {
 			inlconv2stmt(n.Right)
@ -612,7 +619,7 @@ func inlnode(n *Node, maxCost int32) *Node {
 		}
 	}
-	inlnodelist(n.List, maxCost)
+	inlnodelist(n.List, maxCost, inlMap)
 	if n.Op == OBLOCK {
 		for _, n2 := range n.List.Slice() {
 			if n2.Op == OINLCALL {
@ -628,7 +635,7 @@ func inlnode(n *Node, maxCost int32) *Node {
 		}
 	}
-	inlnodelist(n.Rlist, maxCost)
+	inlnodelist(n.Rlist, maxCost, inlMap)
 	s := n.Rlist.Slice()
 	for i1, n1 := range s {
 		if n1.Op == OINLCALL {
@ -640,7 +647,7 @@ func inlnode(n *Node, maxCost int32) *Node {
 		}
 	}
-	inlnodelist(n.Nbody, maxCost)
+	inlnodelist(n.Nbody, maxCost, inlMap)
 	for _, n := range n.Nbody.Slice() {
 		if n.Op == OINLCALL {
 			inlconv2stmt(n)
@ -663,12 +670,12 @@ func inlnode(n *Node, maxCost int32) *Node {
 			fmt.Printf("%v:call to func %+v\n", n.Line(), n.Left)
 		}
 		if n.Left.Func != nil && n.Left.Func.Inl != nil && !isIntrinsicCall(n) { // normal case
-			n = mkinlcall(n, n.Left, maxCost)
+			n = mkinlcall(n, n.Left, maxCost, inlMap)
 		} else if n.Left.isMethodExpression() && asNode(n.Left.Sym.Def) != nil {
-			n = mkinlcall(n, asNode(n.Left.Sym.Def), maxCost)
+			n = mkinlcall(n, asNode(n.Left.Sym.Def), maxCost, inlMap)
 		} else if n.Left.Op == OCLOSURE {
 			if f := inlinableClosure(n.Left); f != nil {
-				n = mkinlcall(n, f, maxCost)
+				n = mkinlcall(n, f, maxCost, inlMap)
 			}
 		} else if n.Left.Op == ONAME && n.Left.Name != nil && n.Left.Name.Defn != nil {
 			if d := n.Left.Name.Defn; d.Op == OAS && d.Right.Op == OCLOSURE {
@ -680,6 +687,10 @@ func inlnode(n *Node, maxCost int32) *Node {
 						if Debug['m'] > 1 {
 							fmt.Printf("%v: cannot inline escaping closure variable %v\n", n.Line(), n.Left)
 						}
 						if logopt.Enabled() {
 							logopt.LogOpt(n.Pos, "cannotInlineCall", "inline", Curfn.funcname(),
 								fmt.Sprintf("%v cannot be inlined (escaping closure variable)", n.Left))
 						}
 						break
 					}
@ -688,13 +699,21 @@ func inlnode(n *Node, maxCost int32) *Node {
 						if Debug['m'] > 1 {
 							if a != nil {
 								fmt.Printf("%v: cannot inline re-assigned closure variable at %v: %v\n", n.Line(), a.Line(), a)
 								if logopt.Enabled() {
 									logopt.LogOpt(n.Pos, "cannotInlineCall", "inline", Curfn.funcname(),
 										fmt.Sprintf("%v cannot be inlined (re-assigned closure variable)", a))
 								}
 							} else {
 								fmt.Printf("%v: cannot inline global closure variable %v\n", n.Line(), n.Left)
 								if logopt.Enabled() {
 									logopt.LogOpt(n.Pos, "cannotInlineCall", "inline", Curfn.funcname(),
 										fmt.Sprintf("%v cannot be inlined (global closure variable)", n.Left))
 								}
 							}
 						}
 						break
 					}
-					n = mkinlcall(n, f, maxCost)
+					n = mkinlcall(n, f, maxCost, inlMap)
 				}
 			}
 		}
@ -713,7 +732,7 @@ func inlnode(n *Node, maxCost int32) *Node {
 			Fatalf("no function definition for [%p] %+v\n", n.Left.Type, n.Left.Type)
 		}
-		n = mkinlcall(n, asNode(n.Left.Type.FuncType().Nname), maxCost)
+		n = mkinlcall(n, asNode(n.Left.Type.FuncType().Nname), maxCost, inlMap)
 	}
 	lineno = lno
@ -833,9 +852,12 @@ var inlgen int
 // parameters.
 // The result of mkinlcall MUST be assigned back to n, e.g.
 // 	n.Left = mkinlcall(n.Left, fn, isddd)
-func mkinlcall(n, fn *Node, maxCost int32) *Node {
+func mkinlcall(n, fn *Node, maxCost int32, inlMap map[*Node]bool) *Node {
 	if fn.Func.Inl == nil {
-		// No inlinable body.
+		if logopt.Enabled() {
 			logopt.LogOpt(n.Pos, "cannotInlineCall", "inline", Curfn.funcname(),
 				fmt.Sprintf("%s cannot be inlined", fn.pkgFuncName()))
 		}
 		return n
 	}
 	if fn.Func.Inl.Cost > maxCost {
@ -866,6 +888,16 @@ func mkinlcall(n, fn *Node, maxCost int32) *Node {
 		return n
 	}
 	if inlMap[fn] {
 		if Debug['m'] > 1 {
 			fmt.Printf("%v: cannot inline %v into %v: repeated recursive cycle\n", n.Line(), fn, Curfn.funcname())
 		}
 		return n
 	}
 	inlMap[fn] = true
 	defer func() {
 		inlMap[fn] = false
 	}()
 	if Debug_typecheckinl == 0 {
 		typecheckinl(fn)
 	}
@ -879,9 +911,6 @@ func mkinlcall(n, fn *Node, maxCost int32) *Node {
 	if Debug['m'] > 2 {
 		fmt.Printf("%v: Before inlining: %+v\n", n.Line(), n)
 	}
 	if logopt.Enabled() {
 		logopt.LogOpt(n.Pos, "inlineCall", "inline", Curfn.funcname(), fn.pkgFuncName())
 	}
 	if ssaDump != "" && ssaDump == Curfn.funcname() {
 		ssaDumpInlined = append(ssaDumpInlined, fn)
@ -1129,7 +1158,7 @@ func mkinlcall(n, fn *Node, maxCost int32) *Node {
 	// instead we emit the things that the body needs
 	// and each use must redo the inlining.
 	// luckily these are small.
-	inlnodelist(call.Nbody, maxCost)
+	inlnodelist(call.Nbody, maxCost, inlMap)
 	for _, n := range call.Nbody.Slice() {
 		if n.Op == OINLCALL {
 			inlconv2stmt(n)
--- a/src/cmd/compile/internal/gc/inl_test.go
+++ b/src/cmd/compile/internal/gc/inl_test.go
@ -155,11 +155,11 @@ func TestIntendedInlining(t *testing.T) {
 		},
 	}
-	if runtime.GOARCH != "386" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" {
+	if runtime.GOARCH != "386" && runtime.GOARCH != "mips64" && runtime.GOARCH != "mips64le" && runtime.GOARCH != "riscv64" {
 		// nextFreeFast calls sys.Ctz64, which on 386 is implemented in asm and is not inlinable.
 		// We currently don't have midstack inlining so nextFreeFast is also not inlinable on 386.
-		// On MIPS64x, Ctz64 is not intrinsified and causes nextFreeFast too expensive to inline
+		// On mips64x and riscv64, Ctz64 is not intrinsified and causes nextFreeFast too expensive
-		// (Issue 22239).
+		// to inline (Issue 22239).
 		want["runtime"] = append(want["runtime"], "nextFreeFast")
 	}
 	if runtime.GOARCH != "386" {
@ -175,7 +175,7 @@ func TestIntendedInlining(t *testing.T) {
 	}
 	switch runtime.GOARCH {
-	case "386", "wasm", "arm", "riscv64":
+	case "386", "wasm", "arm":
 	default:
 		// TODO(mvdan): As explained in /test/inline_sync.go, some
 		// architectures don't have atomic intrinsics, so these go over
--- a/src/cmd/compile/internal/gc/lex.go
+++ b/src/cmd/compile/internal/gc/lex.go
@ -28,16 +28,18 @@ func isQuoted(s string) bool {
 	return len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"'
 }
 type PragmaFlag int16
 const (
 	// Func pragmas.
-	Nointerface    syntax.Pragma = 1 << iota
+	Nointerface    PragmaFlag = 1 << iota
-	Noescape                     // func parameters don't escape
+	Noescape                  // func parameters don't escape
-	Norace                       // func must not have race detector annotations
+	Norace                    // func must not have race detector annotations
-	Nosplit                      // func should not execute on separate stack
+	Nosplit                   // func should not execute on separate stack
-	Noinline                     // func should not be inlined
+	Noinline                  // func should not be inlined
-	NoCheckPtr                   // func should not be instrumented by checkptr
+	NoCheckPtr                // func should not be instrumented by checkptr
-	CgoUnsafeArgs                // treat a pointer to one arg as a pointer to them all
+	CgoUnsafeArgs             // treat a pointer to one arg as a pointer to them all
-	UintptrEscapes               // pointers converted to uintptr escape
+	UintptrEscapes            // pointers converted to uintptr escape
 	// Runtime-only func pragmas.
 	// See ../../../../runtime/README.md for detailed descriptions.
@ -50,7 +52,24 @@ const (
 	NotInHeap // values of this type must not be heap allocated
 )
-func pragmaValue(verb string) syntax.Pragma {
+const (
 	FuncPragmas = Nointerface |
 		Noescape |
 		Norace |
 		Nosplit |
 		Noinline |
 		NoCheckPtr |
 		CgoUnsafeArgs |
 		UintptrEscapes |
 		Systemstack |
 		Nowritebarrier |
 		Nowritebarrierrec |
 		Yeswritebarrierrec
 	TypePragmas = NotInHeap
 )
 func pragmaFlag(verb string) PragmaFlag {
 	switch verb {
 	case "go:nointerface":
 		if objabi.Fieldtrack_enabled != 0 {
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@ -14,6 +14,7 @@ import (
 	"cmd/compile/internal/types"
 	"cmd/internal/bio"
 	"cmd/internal/dwarf"
 	"cmd/internal/goobj2"
 	"cmd/internal/obj"
 	"cmd/internal/objabi"
 	"cmd/internal/src"
@ -280,7 +281,7 @@ func Main(archInit func(*Arch)) {
 	flag.StringVar(&benchfile, "bench", "", "append benchmark times to `file`")
 	flag.BoolVar(&smallFrames, "smallframes", false, "reduce the size limit for stack allocated objects")
 	flag.BoolVar(&Ctxt.UseBASEntries, "dwarfbasentries", Ctxt.UseBASEntries, "use base address selection entries in DWARF")
-	flag.BoolVar(&Ctxt.Flag_newobj, "newobj", false, "use new object file format")
+	flag.BoolVar(&Ctxt.Flag_go115newobj, "go115newobj", true, "use new object file format")
 	flag.StringVar(&jsonLogOpt, "json", "", "version,destination for JSON compiler/optimizer logging")
 	objabi.Flagparse(usage)
@ -314,7 +315,7 @@ func Main(archInit func(*Arch)) {
 	// Record flags that affect the build result. (And don't
 	// record flags that don't, since that would cause spurious
 	// changes in the binary.)
-	recordFlags("B", "N", "l", "msan", "race", "shared", "dynlink", "dwarflocationlists", "dwarfbasentries", "smallframes", "spectre", "newobj")
+	recordFlags("B", "N", "l", "msan", "race", "shared", "dynlink", "dwarflocationlists", "dwarfbasentries", "smallframes", "spectre", "go115newobj")
 	if smallFrames {
 		maxStackVarSize = 128 * 1024
@ -379,9 +380,8 @@ func Main(archInit func(*Arch)) {
 	if flag_race && flag_msan {
 		log.Fatal("cannot use both -race and -msan")
 	}
-	if (flag_race || flag_msan) && objabi.GOOS != "windows" {
+	if flag_race || flag_msan {
-		// -race and -msan imply -d=checkptr for now (except on windows).
+		// -race and -msan imply -d=checkptr for now.
 		// TODO(mdempsky): Re-evaluate before Go 1.14. See #34964.
 		Debug_checkptr = 1
 	}
 	if ispkgin(omit_pkgs) {
@ -679,8 +679,12 @@ func Main(archInit func(*Arch)) {
 	if Debug['l'] != 0 {
 		// Find functions that can be inlined and clone them before walk expands them.
 		visitBottomUp(xtop, func(list []*Node, recursive bool) {
 			numfns := numNonClosures(list)
 			for _, n := range list {
-				if !recursive {
+				if !recursive || numfns > 1 {
 					// We allow inlining if there is no
 					// recursion, or the recursion cycle is
 					// across more than one function.
 					caninl(n)
 				} else {
 					if Debug['m'] > 1 {
@ -824,6 +828,17 @@ func Main(archInit func(*Arch)) {
 	}
 }
 // numNonClosures returns the number of functions in list which are not closures.
 func numNonClosures(list []*Node) int {
 	count := 0
 	for _, n := range list {
 		if n.Func.Closure == nil {
 			count++
 		}
 	}
 	return count
 }
 func writebench(filename string) error {
 	f, err := os.OpenFile(filename, os.O_WRONLY|os.O_CREATE|os.O_APPEND, 0666)
 	if err != nil {
@ -1240,15 +1255,6 @@ func importfile(f *Val) *types.Pkg {
 		}
 	}
 	// assume files move (get installed) so don't record the full path
 	if packageFile != nil {
 		// If using a packageFile map, assume path_ can be recorded directly.
 		Ctxt.AddImport(path_)
 	} else {
 		// For file "/Users/foo/go/pkg/darwin_amd64/math.a" record "math.a".
 		Ctxt.AddImport(file[len(file)-len(path_)-len(".a"):])
 	}
 	// In the importfile, if we find:
 	// $$\n  (textual format): not supported anymore
 	// $$B\n (binary format) : import directly, then feed the lexer a dummy statement
@ -1273,6 +1279,7 @@ func importfile(f *Val) *types.Pkg {
 		c, _ = imp.ReadByte()
 	}
 	var fingerprint goobj2.FingerprintType
 	switch c {
 	case '\n':
 		yyerror("cannot import %s: old export format no longer supported (recompile library)", path_)
@ -1296,13 +1303,22 @@ func importfile(f *Val) *types.Pkg {
 			yyerror("import %s: unexpected package format byte: %v", file, c)
 			errorexit()
 		}
-		iimport(importpkg, imp)
+		fingerprint = iimport(importpkg, imp)
 	default:
 		yyerror("no import in %q", path_)
 		errorexit()
 	}
 	// assume files move (get installed) so don't record the full path
 	if packageFile != nil {
 		// If using a packageFile map, assume path_ can be recorded directly.
 		Ctxt.AddImport(path_, fingerprint)
 	} else {
 		// For file "/Users/foo/go/pkg/darwin_amd64/math.a" record "math.a".
 		Ctxt.AddImport(file[len(file)-len(path_)-len(".a"):], fingerprint)
 	}
 	if importpkg.Height >= myheight {
 		myheight = importpkg.Height + 1
 	}
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
@ -241,6 +241,10 @@ func (p *noder) node() {
 	p.setlineno(p.file.PkgName)
 	mkpackage(p.file.PkgName.Value)
 	if pragma, ok := p.file.Pragma.(*Pragma); ok {
 		p.checkUnused(pragma)
 	}
 	xtop = append(xtop, p.decls(p.file.DeclList)...)
 	for _, n := range p.linknames {
@ -313,6 +317,10 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) {
 		return // avoid follow-on errors if there was a syntax error
 	}
 	if pragma, ok := imp.Pragma.(*Pragma); ok {
 		p.checkUnused(pragma)
 	}
 	val := p.basicLit(imp.Path)
 	ipkg := importfile(&val)
@ -363,6 +371,10 @@ func (p *noder) varDecl(decl *syntax.VarDecl) []*Node {
 		exprs = p.exprList(decl.Values)
 	}
 	if pragma, ok := decl.Pragma.(*Pragma); ok {
 		p.checkUnused(pragma)
 	}
 	p.setlineno(decl)
 	return variter(names, typ, exprs)
 }
@ -384,6 +396,10 @@ func (p *noder) constDecl(decl *syntax.ConstDecl, cs *constState) []*Node {
 		}
 	}
 	if pragma, ok := decl.Pragma.(*Pragma); ok {
 		p.checkUnused(pragma)
 	}
 	names := p.declNames(decl.NameList)
 	typ := p.typeExprOrNil(decl.Type)
@ -438,11 +454,13 @@ func (p *noder) typeDecl(decl *syntax.TypeDecl) *Node {
 	param := n.Name.Param
 	param.Ntype = typ
 	param.Pragma = decl.Pragma
 	param.Alias = decl.Alias
-	if param.Alias && param.Pragma != 0 {
+	if pragma, ok := decl.Pragma.(*Pragma); ok {
-		yyerror("cannot specify directive with type alias")
+		if !decl.Alias {
-		param.Pragma = 0
+			param.Pragma = pragma.Flag & TypePragmas
 			pragma.Flag &^= TypePragmas
 		}
 		p.checkUnused(pragma)
 	}
 	nod := p.nod(decl, ODCLTYPE, n, nil)
@ -493,10 +511,13 @@ func (p *noder) funcDecl(fun *syntax.FuncDecl) *Node {
 	f.Func.Nname.Name.Defn = f
 	f.Func.Nname.Name.Param.Ntype = t
-	pragma := fun.Pragma
+	if pragma, ok := fun.Pragma.(*Pragma); ok {
-	f.Func.Pragma = fun.Pragma
+		f.Func.Pragma = pragma.Flag & FuncPragmas
-	if pragma&Systemstack != 0 && pragma&Nosplit != 0 {
+		if pragma.Flag&Systemstack != 0 && pragma.Flag&Nosplit != 0 {
-		yyerrorl(f.Pos, "go:nosplit and go:systemstack cannot be combined")
+			yyerrorl(f.Pos, "go:nosplit and go:systemstack cannot be combined")
 		}
 		pragma.Flag &^= FuncPragmas
 		p.checkUnused(pragma)
 	}
 	if fun.Recv == nil {
@ -1479,13 +1500,58 @@ var allowedStdPragmas = map[string]bool{
 	"go:generate":           true,
 }
 // *Pragma is the value stored in a syntax.Pragma during parsing.
 type Pragma struct {
 	Flag PragmaFlag  // collected bits
 	Pos  []PragmaPos // position of each individual flag
 }
 type PragmaPos struct {
 	Flag PragmaFlag
 	Pos  syntax.Pos
 }
 func (p *noder) checkUnused(pragma *Pragma) {
 	for _, pos := range pragma.Pos {
 		if pos.Flag&pragma.Flag != 0 {
 			p.yyerrorpos(pos.Pos, "misplaced compiler directive")
 		}
 	}
 }
 func (p *noder) checkUnusedDuringParse(pragma *Pragma) {
 	for _, pos := range pragma.Pos {
 		if pos.Flag&pragma.Flag != 0 {
 			p.error(syntax.Error{Pos: pos.Pos, Msg: "misplaced compiler directive"})
 		}
 	}
 }
 // pragma is called concurrently if files are parsed concurrently.
-func (p *noder) pragma(pos syntax.Pos, text string) syntax.Pragma {
+func (p *noder) pragma(pos syntax.Pos, blankLine bool, text string, old syntax.Pragma) syntax.Pragma {
-	switch {
+	pragma, _ := old.(*Pragma)
-	case strings.HasPrefix(text, "line "):
+	if pragma == nil {
 		pragma = new(Pragma)
 	}
 	if text == "" {
 		// unused pragma; only called with old != nil.
 		p.checkUnusedDuringParse(pragma)
 		return nil
 	}
 	if strings.HasPrefix(text, "line ") {
 		// line directives are handled by syntax package
 		panic("unreachable")
 	}
 	if !blankLine {
 		// directive must be on line by itself
 		p.error(syntax.Error{Pos: pos, Msg: "misplaced compiler directive"})
 		return pragma
 	}
 	switch {
 	case strings.HasPrefix(text, "go:linkname "):
 		f := strings.Fields(text)
 		if !(2 <= len(f) && len(f) <= 3) {
@ -1513,7 +1579,8 @@ func (p *noder) pragma(pos syntax.Pos, text string) syntax.Pragma {
 				p.error(syntax.Error{Pos: pos, Msg: fmt.Sprintf("invalid library name %q in cgo_import_dynamic directive", lib)})
 			}
 			p.pragcgo(pos, text)
-			return pragmaValue("go:cgo_import_dynamic")
+			pragma.Flag |= pragmaFlag("go:cgo_import_dynamic")
 			break
 		}
 		fallthrough
 	case strings.HasPrefix(text, "go:cgo_"):
@ -1530,18 +1597,19 @@ func (p *noder) pragma(pos syntax.Pos, text string) syntax.Pragma {
 		if i := strings.Index(text, " "); i >= 0 {
 			verb = verb[:i]
 		}
-		prag := pragmaValue(verb)
+		flag := pragmaFlag(verb)
 		const runtimePragmas = Systemstack | Nowritebarrier | Nowritebarrierrec | Yeswritebarrierrec
-		if !compiling_runtime && prag&runtimePragmas != 0 {
+		if !compiling_runtime && flag&runtimePragmas != 0 {
 			p.error(syntax.Error{Pos: pos, Msg: fmt.Sprintf("//%s only allowed in runtime", verb)})
 		}
-		if prag == 0 && !allowedStdPragmas[verb] && compiling_std {
+		if flag == 0 && !allowedStdPragmas[verb] && compiling_std {
 			p.error(syntax.Error{Pos: pos, Msg: fmt.Sprintf("//%s is not allowed in the standard library", verb)})
 		}
-		return prag
+		pragma.Flag |= flag
 		pragma.Pos = append(pragma.Pos, PragmaPos{flag, pos})
 	}
-	return 0
+	return pragma
 }
 // isCgoGeneratedFile reports whether pos is in a file
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@ -365,11 +365,12 @@ func stringsym(pos src.XPos, s string) (data *obj.LSym) {
 var slicebytes_gen int
-func slicebytes(nam *Node, s string, len int) {
+func slicebytes(nam *Node, s string) {
 	slicebytes_gen++
 	symname := fmt.Sprintf(".gobytes.%d", slicebytes_gen)
 	sym := localpkg.Lookup(symname)
-	sym.Def = asTypesNode(newname(sym))
+	symnode := newname(sym)
 	sym.Def = asTypesNode(symnode)
 	lsym := sym.Linksym()
 	off := dsname(lsym, 0, s, nam.Pos, "slice")
@ -378,11 +379,7 @@ func slicebytes(nam *Node, s string, len int) {
 	if nam.Op != ONAME {
 		Fatalf("slicebytes %v", nam)
 	}
-	nsym := nam.Sym.Linksym()
+	slicesym(nam, symnode, int64(len(s)))
 	off = int(nam.Xoffset)
 	off = dsymptr(nsym, off, lsym, 0)
 	off = duintptr(nsym, off, uint64(len))
 	duintptr(nsym, off, uint64(len))
 }
 func dsname(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
@ -417,69 +414,99 @@ func dsymptrWeakOff(s *obj.LSym, off int, x *obj.LSym) int {
 	return off
 }
-func gdata(nam *Node, nr *Node, wid int) {
+// slicesym writes a static slice symbol {&arr, lencap, lencap} to n.
-	if nam.Op != ONAME {
+// arr must be an ONAME. slicesym does not modify n.
-		Fatalf("gdata nam op %v", nam.Op)
+func slicesym(n, arr *Node, lencap int64) {
 	s := n.Sym.Linksym()
 	base := n.Xoffset
 	if arr.Op != ONAME {
 		Fatalf("slicesym non-name arr %v", arr)
 	}
-	if nam.Sym == nil {
+	s.WriteAddr(Ctxt, base, Widthptr, arr.Sym.Linksym(), arr.Xoffset)
-		Fatalf("gdata nil nam sym")
+	s.WriteInt(Ctxt, base+sliceLenOffset, Widthptr, lencap)
 	s.WriteInt(Ctxt, base+sliceCapOffset, Widthptr, lencap)
 }
 // addrsym writes the static address of a to n. a must be an ONAME.
 // Neither n nor a is modified.
 func addrsym(n, a *Node) {
 	if n.Op != ONAME {
 		Fatalf("addrsym n op %v", n.Op)
 	}
-	s := nam.Sym.Linksym()
+	if n.Sym == nil {
 		Fatalf("addrsym nil n sym")
 	}
 	if a.Op != ONAME {
 		Fatalf("addrsym a op %v", a.Op)
 	}
 	s := n.Sym.Linksym()
 	s.WriteAddr(Ctxt, n.Xoffset, Widthptr, a.Sym.Linksym(), a.Xoffset)
 }
-	switch nr.Op {
+// pfuncsym writes the static address of f to n. f must be a global function.
-	case OLITERAL:
+// Neither n nor f is modified.
-		switch u := nr.Val().U.(type) {
+func pfuncsym(n, f *Node) {
-		case bool:
+	if n.Op != ONAME {
-			i := int64(obj.Bool2int(u))
+		Fatalf("pfuncsym n op %v", n.Op)
-			s.WriteInt(Ctxt, nam.Xoffset, wid, i)
+	}
 	if n.Sym == nil {
 		Fatalf("pfuncsym nil n sym")
 	}
 	if f.Class() != PFUNC {
 		Fatalf("pfuncsym class not PFUNC %d", f.Class())
 	}
 	s := n.Sym.Linksym()
 	s.WriteAddr(Ctxt, n.Xoffset, Widthptr, funcsym(f.Sym).Linksym(), f.Xoffset)
 }
-		case *Mpint:
+// litsym writes the static literal c to n.
-			s.WriteInt(Ctxt, nam.Xoffset, wid, u.Int64())
+// Neither n nor c is modified.
 func litsym(n, c *Node, wid int) {
 	if n.Op != ONAME {
 		Fatalf("litsym n op %v", n.Op)
 	}
 	if c.Op != OLITERAL {
 		Fatalf("litsym c op %v", c.Op)
 	}
 	if n.Sym == nil {
 		Fatalf("litsym nil n sym")
 	}
 	s := n.Sym.Linksym()
 	switch u := c.Val().U.(type) {
 	case bool:
 		i := int64(obj.Bool2int(u))
 		s.WriteInt(Ctxt, n.Xoffset, wid, i)
-		case *Mpflt:
+	case *Mpint:
-			f := u.Float64()
+		s.WriteInt(Ctxt, n.Xoffset, wid, u.Int64())
 			switch nam.Type.Etype {
 			case TFLOAT32:
 				s.WriteFloat32(Ctxt, nam.Xoffset, float32(f))
 			case TFLOAT64:
 				s.WriteFloat64(Ctxt, nam.Xoffset, f)
 			}
-		case *Mpcplx:
+	case *Mpflt:
-			r := u.Real.Float64()
+		f := u.Float64()
-			i := u.Imag.Float64()
+		switch n.Type.Etype {
-			switch nam.Type.Etype {
+		case TFLOAT32:
-			case TCOMPLEX64:
+			s.WriteFloat32(Ctxt, n.Xoffset, float32(f))
-				s.WriteFloat32(Ctxt, nam.Xoffset, float32(r))
+		case TFLOAT64:
-				s.WriteFloat32(Ctxt, nam.Xoffset+4, float32(i))
+			s.WriteFloat64(Ctxt, n.Xoffset, f)
 			case TCOMPLEX128:
 				s.WriteFloat64(Ctxt, nam.Xoffset, r)
 				s.WriteFloat64(Ctxt, nam.Xoffset+8, i)
 			}
 		case string:
 			symdata := stringsym(nam.Pos, u)
 			s.WriteAddr(Ctxt, nam.Xoffset, Widthptr, symdata, 0)
 			s.WriteInt(Ctxt, nam.Xoffset+int64(Widthptr), Widthptr, int64(len(u)))
 		default:
 			Fatalf("gdata unhandled OLITERAL %v", nr)
 		}
-	case OADDR:
+	case *Mpcplx:
-		if nr.Left.Op != ONAME {
+		r := u.Real.Float64()
-			Fatalf("gdata ADDR left op %v", nr.Left.Op)
+		i := u.Imag.Float64()
 		switch n.Type.Etype {
 		case TCOMPLEX64:
 			s.WriteFloat32(Ctxt, n.Xoffset, float32(r))
 			s.WriteFloat32(Ctxt, n.Xoffset+4, float32(i))
 		case TCOMPLEX128:
 			s.WriteFloat64(Ctxt, n.Xoffset, r)
 			s.WriteFloat64(Ctxt, n.Xoffset+8, i)
 		}
 		to := nr.Left
 		s.WriteAddr(Ctxt, nam.Xoffset, wid, to.Sym.Linksym(), to.Xoffset)
-	case ONAME:
+	case string:
-		if nr.Class() != PFUNC {
+		symdata := stringsym(n.Pos, u)
-			Fatalf("gdata NAME not PFUNC %d", nr.Class())
+		s.WriteAddr(Ctxt, n.Xoffset, Widthptr, symdata, 0)
-		}
+		s.WriteInt(Ctxt, n.Xoffset+int64(Widthptr), Widthptr, int64(len(u)))
 		s.WriteAddr(Ctxt, nam.Xoffset, wid, funcsym(nr.Sym).Linksym(), nr.Xoffset)
 	default:
-		Fatalf("gdata unhandled op %v %v\n", nr, nr.Op)
+		Fatalf("litsym unhandled OLITERAL %v", c)
 	}
 }
--- a/src/cmd/compile/internal/gc/op_string.go
+++ b/src/cmd/compile/internal/gc/op_string.go
@ -1,4 +1,4 @@
-// Code generated by "stringer -type Op -trimprefix O"; DO NOT EDIT.
+// Code generated by "stringer -type=Op -trimprefix=O"; DO NOT EDIT.
 package gc
@ -144,28 +144,27 @@ func _() {
 	_ = x[OTFUNC-133]
 	_ = x[OTARRAY-134]
 	_ = x[ODDD-135]
-	_ = x[ODDDARG-136]
+	_ = x[OINLCALL-136]
-	_ = x[OINLCALL-137]
+	_ = x[OEFACE-137]
-	_ = x[OEFACE-138]
+	_ = x[OITAB-138]
-	_ = x[OITAB-139]
+	_ = x[OIDATA-139]
-	_ = x[OIDATA-140]
+	_ = x[OSPTR-140]
-	_ = x[OSPTR-141]
+	_ = x[OCLOSUREVAR-141]
-	_ = x[OCLOSUREVAR-142]
+	_ = x[OCFUNC-142]
-	_ = x[OCFUNC-143]
+	_ = x[OCHECKNIL-143]
-	_ = x[OCHECKNIL-144]
+	_ = x[OVARDEF-144]
-	_ = x[OVARDEF-145]
+	_ = x[OVARKILL-145]
-	_ = x[OVARKILL-146]
+	_ = x[OVARLIVE-146]
-	_ = x[OVARLIVE-147]
+	_ = x[ORESULT-147]
-	_ = x[ORESULT-148]
+	_ = x[OINLMARK-148]
-	_ = x[OINLMARK-149]
+	_ = x[ORETJMP-149]
-	_ = x[ORETJMP-150]
+	_ = x[OGETG-150]
-	_ = x[OGETG-151]
+	_ = x[OEND-151]
 	_ = x[OEND-152]
 }
-const _Op_name = "XXXNAMENONAMETYPEPACKLITERALADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCALLPARTCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDCLFIELDDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMULDIVMODLSHRSHANDANDNOTNEWNEWOBJNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECVRUNESTRSELRECVSELRECV2IOTAREALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFBLOCKBREAKCASECONTINUEDEFEREMPTYFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWTCHANTMAPTSTRUCTTINTERTFUNCTARRAYDDDDDDARGINLCALLEFACEITABIDATASPTRCLOSUREVARCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKRETJMPGETGEND"
+const _Op_name = "XXXNAMENONAMETYPEPACKLITERALADDSUBORXORADDSTRADDRANDANDAPPENDBYTES2STRBYTES2STRTMPRUNES2STRSTR2BYTESSTR2BYTESTMPSTR2RUNESASAS2AS2DOTTYPEAS2FUNCAS2MAPRAS2RECVASOPCALLCALLFUNCCALLMETHCALLINTERCALLPARTCAPCLOSECLOSURECOMPLITMAPLITSTRUCTLITARRAYLITSLICELITPTRLITCONVCONVIFACECONVNOPCOPYDCLDCLFUNCDCLFIELDDCLCONSTDCLTYPEDELETEDOTDOTPTRDOTMETHDOTINTERXDOTDOTTYPEDOTTYPE2EQNELTLEGEGTDEREFINDEXINDEXMAPKEYSTRUCTKEYLENMAKEMAKECHANMAKEMAPMAKESLICEMULDIVMODLSHRSHANDANDNOTNEWNEWOBJNOTBITNOTPLUSNEGORORPANICPRINTPRINTNPARENSENDSLICESLICEARRSLICESTRSLICE3SLICE3ARRSLICEHEADERRECOVERRECVRUNESTRSELRECVSELRECV2IOTAREALIMAGCOMPLEXALIGNOFOFFSETOFSIZEOFBLOCKBREAKCASECONTINUEDEFEREMPTYFALLFORFORUNTILGOTOIFLABELGORANGERETURNSELECTSWITCHTYPESWTCHANTMAPTSTRUCTTINTERTFUNCTARRAYDDDINLCALLEFACEITABIDATASPTRCLOSUREVARCFUNCCHECKNILVARDEFVARKILLVARLIVERESULTINLMARKRETJMPGETGEND"
-var _Op_index = [...]uint16{0, 3, 7, 13, 17, 21, 28, 31, 34, 36, 39, 45, 49, 55, 61, 70, 82, 91, 100, 112, 121, 123, 126, 136, 143, 150, 157, 161, 165, 173, 181, 190, 198, 201, 206, 213, 220, 226, 235, 243, 251, 257, 261, 270, 277, 281, 284, 291, 299, 307, 314, 320, 323, 329, 336, 344, 348, 355, 363, 365, 367, 369, 371, 373, 375, 380, 385, 393, 396, 405, 408, 412, 420, 427, 436, 439, 442, 445, 448, 451, 454, 460, 463, 469, 472, 478, 482, 485, 489, 494, 499, 505, 510, 514, 519, 527, 535, 541, 550, 561, 568, 572, 579, 586, 594, 598, 602, 606, 613, 620, 628, 634, 639, 644, 648, 656, 661, 666, 670, 673, 681, 685, 687, 692, 694, 699, 705, 711, 717, 723, 728, 732, 739, 745, 750, 756, 759, 765, 772, 777, 781, 786, 790, 800, 805, 813, 819, 826, 833, 839, 846, 852, 856, 859}
+var _Op_index = [...]uint16{0, 3, 7, 13, 17, 21, 28, 31, 34, 36, 39, 45, 49, 55, 61, 70, 82, 91, 100, 112, 121, 123, 126, 136, 143, 150, 157, 161, 165, 173, 181, 190, 198, 201, 206, 213, 220, 226, 235, 243, 251, 257, 261, 270, 277, 281, 284, 291, 299, 307, 314, 320, 323, 329, 336, 344, 348, 355, 363, 365, 367, 369, 371, 373, 375, 380, 385, 393, 396, 405, 408, 412, 420, 427, 436, 439, 442, 445, 448, 451, 454, 460, 463, 469, 472, 478, 482, 485, 489, 494, 499, 505, 510, 514, 519, 527, 535, 541, 550, 561, 568, 572, 579, 586, 594, 598, 602, 606, 613, 620, 628, 634, 639, 644, 648, 656, 661, 666, 670, 673, 681, 685, 687, 692, 694, 699, 705, 711, 717, 723, 728, 732, 739, 745, 750, 756, 759, 766, 771, 775, 780, 784, 794, 799, 807, 813, 820, 827, 833, 840, 846, 850, 853}
 func (i Op) String() string {
 	if i >= Op(len(_Op_index)-1) {
--- a/src/cmd/compile/internal/gc/order.go
+++ b/src/cmd/compile/internal/gc/order.go
@ -407,41 +407,43 @@ func (o *Order) call(n *Node) {
 		// Caller should have already called o.init(n).
 		Fatalf("%v with unexpected ninit", n.Op)
 	}
 	n.Left = o.expr(n.Left, nil)
 	n.Right = o.expr(n.Right, nil) // ODDDARG temp
 	o.exprList(n.List)
-	if n.Op != OCALLFUNC && n.Op != OCALLMETH {
+	// Builtin functions.
 	if n.Op != OCALLFUNC && n.Op != OCALLMETH && n.Op != OCALLINTER {
 		n.Left = o.expr(n.Left, nil)
 		n.Right = o.expr(n.Right, nil)
 		o.exprList(n.List)
 		return
 	}
-	keepAlive := func(i int) {
+
 	fixVariadicCall(n)
 	n.Left = o.expr(n.Left, nil)
 	o.exprList(n.List)
 	if n.Op == OCALLINTER {
 		return
 	}
 	keepAlive := func(arg *Node) {
 		// If the argument is really a pointer being converted to uintptr,
 		// arrange for the pointer to be kept alive until the call returns,
 		// by copying it into a temp and marking that temp
 		// still alive when we pop the temp stack.
-		xp := n.List.Addr(i)
+		if arg.Op == OCONVNOP && arg.Left.Type.IsUnsafePtr() {
-		for (*xp).Op == OCONVNOP && !(*xp).Type.IsUnsafePtr() {
+			x := o.copyExpr(arg.Left, arg.Left.Type, false)
 			xp = &(*xp).Left
 		}
 		x := *xp
 		if x.Type.IsUnsafePtr() {
 			x = o.copyExpr(x, x.Type, false)
 			x.Name.SetKeepalive(true)
-			*xp = x
+			arg.Left = x
 		}
 	}
-	for i, t := range n.Left.Type.Params().FieldSlice() {
+	// Check for "unsafe-uintptr" tag provided by escape analysis.
-		// Check for "unsafe-uintptr" tag provided by escape analysis.
+	for i, param := range n.Left.Type.Params().FieldSlice() {
-		if t.IsDDD() && !n.IsDDD() {
+		if param.Note == unsafeUintptrTag || param.Note == uintptrEscapesTag {
-			if t.Note == uintptrEscapesTag {
+			if arg := n.List.Index(i); arg.Op == OSLICELIT {
-				for ; i < n.List.Len(); i++ {
+				for _, elt := range arg.List.Slice() {
-					keepAlive(i)
+					keepAlive(elt)
 				}
-			}
+			} else {
-		} else {
+				keepAlive(arg)
 			if t.Note == unsafeUintptrTag || t.Note == uintptrEscapesTag {
 				keepAlive(i)
 			}
 		}
 	}
@ -1214,15 +1216,6 @@ func (o *Order) expr(n, lhs *Node) *Node {
 			prealloc[n] = o.newTemp(t, false)
 		}
 	case ODDDARG:
 		if n.Transient() {
 			// The ddd argument does not live beyond the call it is created for.
 			// Allocate a temporary that will be cleaned up when this statement
 			// completes. We could be more aggressive and try to arrange for it
 			// to be cleaned up when the call completes.
 			prealloc[n] = o.newTemp(n.Type.Elem(), false)
 		}
 	case ODOTTYPE, ODOTTYPE2:
 		n.Left = o.expr(n.Left, nil)
 		if !isdirectiface(n.Type) || instrumenting {
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@ -24,6 +24,16 @@ import (
 	"strings"
 )
 // go115ReduceLiveness disables register maps and only produces stack
 // maps at call sites.
 //
 // In Go 1.15, we changed debug call injection to use conservative
 // scanning instead of precise pointer maps, so these are no longer
 // necessary.
 //
 // Keep in sync with runtime/preempt.go:go115ReduceLiveness.
 const go115ReduceLiveness = true
 // OpVarDef is an annotation for the liveness analysis, marking a place
 // where a complete initialization (definition) of a variable begins.
 // Since the liveness analysis can see initialization of single-word
@ -107,7 +117,11 @@ type Liveness struct {
 	be []BlockEffects
-	// unsafePoints bit i is set if Value ID i is not a safe point.
+	// allUnsafe indicates that all points in this function are
 	// unsafe-points.
 	allUnsafe bool
 	// unsafePoints bit i is set if Value ID i is an unsafe-point
 	// (preemption is not allowed). Only valid if !allUnsafe.
 	unsafePoints bvec
 	// An array with a bit vector for each safe point in the
@ -143,52 +157,72 @@ type openDeferVarInfo struct {
 // LivenessMap maps from *ssa.Value to LivenessIndex.
 type LivenessMap struct {
-	m []LivenessIndex
+	vals map[ssa.ID]LivenessIndex
 }
-func (m *LivenessMap) reset(ids int) {
+func (m *LivenessMap) reset() {
-	m2 := m.m
+	if m.vals == nil {
-	if ids > cap(m2) {
+		m.vals = make(map[ssa.ID]LivenessIndex)
 		m2 = make([]LivenessIndex, ids)
 	} else {
-		m2 = m2[:ids]
+		for k := range m.vals {
 			delete(m.vals, k)
 		}
 	}
 	none := LivenessInvalid
 	for i := range m2 {
 		m2[i] = none
 	}
 	m.m = m2
 }
 func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
-	m.m[v.ID] = i
+	m.vals[v.ID] = i
 }
 func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
-	if int(v.ID) < len(m.m) {
+	if !go115ReduceLiveness {
-		return m.m[int(v.ID)]
+		// All safe-points are in the map, so if v isn't in
 		// the map, it's an unsafe-point.
 		if idx, ok := m.vals[v.ID]; ok {
 			return idx
 		}
 		return LivenessInvalid
 	}
-	// Not a safe point.
+
-	return LivenessInvalid
+	// If v isn't in the map, then it's a "don't care" and not an
 	// unsafe-point.
 	if idx, ok := m.vals[v.ID]; ok {
 		return idx
 	}
 	return LivenessIndex{StackMapDontCare, StackMapDontCare, false}
 }
-// LivenessIndex stores the liveness map index for a safe-point.
+// LivenessIndex stores the liveness map information for a Value.
 type LivenessIndex struct {
 	stackMapIndex int
-	regMapIndex   int
+	regMapIndex   int // only for !go115ReduceLiveness
 	// isUnsafePoint indicates that this is an unsafe-point.
 	//
 	// Note that it's possible for a call Value to have a stack
 	// map while also being an unsafe-point. This means it cannot
 	// be preempted at this instruction, but that a preemption or
 	// stack growth may happen in the called function.
 	isUnsafePoint bool
 }
-// LivenessInvalid indicates an unsafe point.
+// LivenessInvalid indicates an unsafe point with no stack map.
-//
+var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness
 // We use index -2 because PCDATA tables conventionally start at -1,
 // so -1 is used to mean the entry liveness map (which is actually at
 // index 0; sigh). TODO(austin): Maybe we should use PCDATA+1 as the
 // index into the liveness map so -1 uniquely refers to the entry
 // liveness map.
 var LivenessInvalid = LivenessIndex{-2, -2}
-func (idx LivenessIndex) Valid() bool {
+// StackMapDontCare indicates that the stack map index at a Value
-	return idx.stackMapIndex >= 0
+// doesn't matter.
 //
 // This is a sentinel value that should never be emitted to the PCDATA
 // stream. We use -1000 because that's obviously never a valid stack
 // index (but -1 is).
 const StackMapDontCare = -1000
 func (idx LivenessIndex) StackMapValid() bool {
 	return idx.stackMapIndex != StackMapDontCare
 }
 func (idx LivenessIndex) RegMapValid() bool {
 	return idx.regMapIndex != StackMapDontCare
 }
 type progeffectscache struct {
@ -377,6 +411,9 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
 // regEffects returns the registers affected by v.
 func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
 	if go115ReduceLiveness {
 		return 0, 0
 	}
 	if v.Op == ssa.OpPhi {
 		// All phi node arguments must come from the same
 		// register and the result must also go to that
@ -458,7 +495,7 @@ func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
 	return uevar, kill
 }
-type liveRegMask uint32
+type liveRegMask uint32 // only if !go115ReduceLiveness
 func (m liveRegMask) niceString(config *ssa.Config) string {
 	if m == 0 {
@ -497,7 +534,7 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 	// Significant sources of allocation are kept in the ssa.Cache
 	// and reused. Surprisingly, the bit vectors themselves aren't
-	// a major source of allocation, but the slices are.
+	// a major source of allocation, but the liveness maps are.
 	if lc, _ := f.Cache.Liveness.(*livenessFuncCache); lc == nil {
 		// Prep the cache so liveness can fill it later.
 		f.Cache.Liveness = new(livenessFuncCache)
@ -505,7 +542,8 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 		if cap(lc.be) >= f.NumBlocks() {
 			lv.be = lc.be[:f.NumBlocks()]
 		}
-		lv.livenessMap = LivenessMap{lc.livenessMap.m[:0]}
+		lv.livenessMap = LivenessMap{lc.livenessMap.vals}
 		lc.livenessMap.vals = nil
 	}
 	if lv.be == nil {
 		lv.be = make([]BlockEffects, f.NumBlocks())
@ -522,7 +560,7 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
 		be.livein = varRegVec{vars: bulk.next()}
 		be.liveout = varRegVec{vars: bulk.next()}
 	}
-	lv.livenessMap.reset(lv.f.NumValues())
+	lv.livenessMap.reset()
 	lv.markUnsafePoints()
 	return lv
@ -644,9 +682,18 @@ func (lv *Liveness) pointerMap(liveout bvec, vars []*Node, args, locals bvec) {
 // markUnsafePoints finds unsafe points and computes lv.unsafePoints.
 func (lv *Liveness) markUnsafePoints() {
 	// The runtime assumes the only safe-points are function
 	// prologues (because that's how it used to be). We could and
 	// should improve that, but for now keep consider all points
 	// in the runtime unsafe. obj will add prologues and their
 	// safe-points.
 	//
 	// go:nosplit functions are similar. Since safe points used to
 	// be coupled with stack checks, go:nosplit often actually
 	// means "no safe points in this function".
 	if compiling_runtime || lv.f.NoSplit {
-		// No complex analysis necessary. Do this on the fly
+		// No complex analysis necessary.
-		// in issafepoint.
+		lv.allUnsafe = true
 		return
 	}
@ -801,20 +848,28 @@ func (lv *Liveness) markUnsafePoints() {
 	}
 }
-// Returns true for instructions that are safe points that must be annotated
+// Returns true for instructions that must have a stack map.
-// with liveness information.
+//
-func (lv *Liveness) issafepoint(v *ssa.Value) bool {
+// This does not necessarily mean the instruction is a safe-point. In
-	// The runtime was written with the assumption that
+// particular, call Values can have a stack map in case the callee
-	// safe-points only appear at call sites (because that's how
+// grows the stack, but not themselves be a safe-point.
-	// it used to be). We could and should improve that, but for
+func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
-	// now keep the old safe-point rules in the runtime.
+	// The runtime only has safe-points in function prologues, so
-	//
+	// we only need stack maps at call sites. go:nosplit functions
-	// go:nosplit functions are similar. Since safe points used to
+	// are similar.
-	// be coupled with stack checks, go:nosplit often actually
+	if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit {
-	// means "no safe points in this function".
+		if !v.Op.IsCall() {
-	if compiling_runtime || lv.f.NoSplit {
+			return false
-		return v.Op.IsCall()
+		}
 		// typedmemclr and typedmemmove are write barriers and
 		// deeply non-preemptible. They are unsafe points and
 		// hence should not have liveness maps.
 		if sym, _ := v.Aux.(*obj.LSym); sym == typedmemclr || sym == typedmemmove {
 			return false
 		}
 		return true
 	}
 	switch v.Op {
 	case ssa.OpInitMem, ssa.OpArg, ssa.OpSP, ssa.OpSB,
 		ssa.OpSelect0, ssa.OpSelect1, ssa.OpGetG,
@ -1049,7 +1104,7 @@ func (lv *Liveness) epilogue() {
 		// Walk forward through the basic block instructions and
 		// allocate liveness maps for those instructions that need them.
 		for _, v := range b.Values {
-			if !lv.issafepoint(v) {
+			if !lv.hasStackMap(v) {
 				continue
 			}
@ -1064,7 +1119,7 @@ func (lv *Liveness) epilogue() {
 		for i := len(b.Values) - 1; i >= 0; i-- {
 			v := b.Values[i]
-			if lv.issafepoint(v) {
+			if lv.hasStackMap(v) {
 				// Found an interesting instruction, record the
 				// corresponding liveness information.
@ -1113,7 +1168,7 @@ func (lv *Liveness) epilogue() {
 		// of the context register, so it's dead after the call.
 		index = int32(firstBitmapIndex)
 		for _, v := range b.Values {
-			if lv.issafepoint(v) {
+			if lv.hasStackMap(v) {
 				live := lv.livevars[index]
 				if v.Op.IsCall() && live.regs != 0 {
 					lv.printDebug()
@ -1139,13 +1194,15 @@ func (lv *Liveness) epilogue() {
 			lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
 		}
 	}
-	// Check that no registers are live at function entry.
+	if !go115ReduceLiveness {
-	// The context register, if any, comes from a
+		// Check that no registers are live at function entry.
-	// LoweredGetClosurePtr operation first thing in the function,
+		// The context register, if any, comes from a
-	// so it doesn't appear live at entry.
+		// LoweredGetClosurePtr operation first thing in the function,
-	if regs := lv.regMaps[0]; regs != 0 {
+		// so it doesn't appear live at entry.
-		lv.printDebug()
+		if regs := lv.regMaps[0]; regs != 0 {
-		lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
+			lv.printDebug()
 			lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
 		}
 	}
 }
@ -1166,7 +1223,7 @@ func (lv *Liveness) epilogue() {
 // PCDATA tables cost about 100k. So for now we keep using a single index for
 // both bitmap lists.
 func (lv *Liveness) compact(b *ssa.Block) {
-	add := func(live varRegVec) LivenessIndex {
+	add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness
 		// Deduplicate the stack map.
 		stackIndex := lv.stackMapSet.add(live.vars)
 		// Deduplicate the register map.
@ -1176,17 +1233,33 @@ func (lv *Liveness) compact(b *ssa.Block) {
 			lv.regMapSet[live.regs] = regIndex
 			lv.regMaps = append(lv.regMaps, live.regs)
 		}
-		return LivenessIndex{stackIndex, regIndex}
+		return LivenessIndex{stackIndex, regIndex, isUnsafePoint}
 	}
 	pos := 0
 	if b == lv.f.Entry {
 		// Handle entry stack map.
-		add(lv.livevars[0])
+		if !go115ReduceLiveness {
 			add(lv.livevars[0], false)
 		} else {
 			lv.stackMapSet.add(lv.livevars[0].vars)
 		}
 		pos++
 	}
 	for _, v := range b.Values {
-		if lv.issafepoint(v) {
+		if go115ReduceLiveness {
-			lv.livenessMap.set(v, add(lv.livevars[pos]))
+			hasStackMap := lv.hasStackMap(v)
 			isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
 			idx := LivenessIndex{StackMapDontCare, 0, isUnsafePoint}
 			if hasStackMap {
 				idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
 				pos++
 			}
 			if hasStackMap || isUnsafePoint {
 				lv.livenessMap.set(v, idx)
 			}
 		} else if lv.hasStackMap(v) {
 			isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
 			lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint))
 			pos++
 		}
 	}
@ -1291,7 +1364,6 @@ func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, re
 func (lv *Liveness) printDebug() {
 	fmt.Printf("liveness: %s\n", lv.fn.funcname())
 	pcdata := 0
 	for i, b := range lv.f.Blocks {
 		if i > 0 {
 			fmt.Printf("\n")
@ -1327,7 +1399,7 @@ func (lv *Liveness) printDebug() {
 		// program listing, with individual effects listed
 		if b == lv.f.Entry {
-			live := lv.stackMaps[pcdata]
+			live := lv.stackMaps[0]
 			fmt.Printf("(%s) function entry\n", linestr(lv.fn.Func.Nname.Pos))
 			fmt.Printf("\tlive=")
 			printed = false
@ -1347,9 +1419,7 @@ func (lv *Liveness) printDebug() {
 		for _, v := range b.Values {
 			fmt.Printf("(%s) %v\n", linestr(v.Pos), v.LongString())
-			if pos := lv.livenessMap.Get(v); pos.Valid() {
+			pcdata := lv.livenessMap.Get(v)
 				pcdata = pos.stackMapIndex
 			}
 			pos, effect := lv.valueEffects(v)
 			regUevar, regKill := lv.regEffects(v)
@ -1360,31 +1430,38 @@ func (lv *Liveness) printDebug() {
 				fmt.Printf("\n")
 			}
-			if !lv.issafepoint(v) {
+			if pcdata.StackMapValid() || pcdata.RegMapValid() {
-				continue
+				fmt.Printf("\tlive=")
 				printed = false
 				if pcdata.StackMapValid() {
 					live := lv.stackMaps[pcdata.stackMapIndex]
 					for j, n := range lv.vars {
 						if !live.Get(int32(j)) {
 							continue
 						}
 						if printed {
 							fmt.Printf(",")
 						}
 						fmt.Printf("%v", n)
 						printed = true
 					}
 				}
 				if pcdata.RegMapValid() { // only if !go115ReduceLiveness
 					regLive := lv.regMaps[pcdata.regMapIndex]
 					if regLive != 0 {
 						if printed {
 							fmt.Printf(",")
 						}
 						fmt.Printf("%s", regLive.niceString(lv.f.Config))
 						printed = true
 					}
 				}
 				fmt.Printf("\n")
 			}
-			live := lv.stackMaps[pcdata]
+			if pcdata.isUnsafePoint {
-			fmt.Printf("\tlive=")
+				fmt.Printf("\tunsafe-point\n")
 			printed = false
 			for j, n := range lv.vars {
 				if !live.Get(int32(j)) {
 					continue
 				}
 				if printed {
 					fmt.Printf(",")
 				}
 				fmt.Printf("%v", n)
 				printed = true
 			}
 			regLive := lv.regMaps[lv.livenessMap.Get(v).regMapIndex]
 			if regLive != 0 {
 				if printed {
 					fmt.Printf(",")
 				}
 				fmt.Printf("%s", regLive.niceString(lv.f.Config))
 			}
 			fmt.Printf("\n")
 		}
 		// bb bitsets
@ -1453,19 +1530,21 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
 		loff = dbvec(&liveSymTmp, loff, locals)
 	}
-	regs := bvalloc(lv.usedRegs())
+	if !go115ReduceLiveness {
-	roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
+		regs := bvalloc(lv.usedRegs())
-	roff = duint32(&regsSymTmp, roff, uint32(regs.n))        // number of bits in each bitmap
+		roff := duint32(&regsSymTmp, 0, uint32(len(lv.regMaps))) // number of bitmaps
-	if regs.n > 32 {
+		roff = duint32(&regsSymTmp, roff, uint32(regs.n))        // number of bits in each bitmap
-		// Our uint32 conversion below won't work.
+		if regs.n > 32 {
-		Fatalf("GP registers overflow uint32")
+			// Our uint32 conversion below won't work.
-	}
+			Fatalf("GP registers overflow uint32")
 		}
-	if regs.n > 0 {
+		if regs.n > 0 {
-		for _, live := range lv.regMaps {
+			for _, live := range lv.regMaps {
-			regs.Clear()
+				regs.Clear()
-			regs.b[0] = uint32(live)
+				regs.b[0] = uint32(live)
-			roff = dbvec(&regsSymTmp, roff, regs)
+				roff = dbvec(&regsSymTmp, roff, regs)
 			}
 		}
 	}
@ -1480,7 +1559,11 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
 			lsym.P = tmpSym.P
 		})
 	}
-	return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
+	if !go115ReduceLiveness {
 		return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(&regsSymTmp)
 	}
 	// TODO(go115ReduceLiveness): Remove regsSym result
 	return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil
 }
 // Entry pointer for liveness analysis. Solves for the liveness of
@ -1500,7 +1583,7 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
 		lv.showlive(nil, lv.stackMaps[0])
 		for _, b := range f.Blocks {
 			for _, val := range b.Values {
-				if idx := lv.livenessMap.Get(val); idx.Valid() {
+				if idx := lv.livenessMap.Get(val); idx.StackMapValid() {
 					lv.showlive(val, lv.stackMaps[idx.stackMapIndex])
 				}
 			}
@ -1519,7 +1602,7 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
 			}
 			cache.be = lv.be
 		}
-		if cap(lv.livenessMap.m) < 2000 {
+		if len(lv.livenessMap.vals) < 2000 {
 			cache.livenessMap = lv.livenessMap
 		}
 	}
@ -1540,11 +1623,13 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
 	p.To.Name = obj.NAME_EXTERN
 	p.To.Sym = ls.Func.GCLocals
-	p = pp.Prog(obj.AFUNCDATA)
+	if !go115ReduceLiveness {
-	Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
+		p = pp.Prog(obj.AFUNCDATA)
-	p.To.Type = obj.TYPE_MEM
+		Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
-	p.To.Name = obj.NAME_EXTERN
+		p.To.Type = obj.TYPE_MEM
-	p.To.Sym = ls.Func.GCRegs
+		p.To.Name = obj.NAME_EXTERN
 		p.To.Sym = ls.Func.GCRegs
 	}
 	return lv.livenessMap
 }
--- a/src/cmd/compile/internal/gc/scc.go
+++ b/src/cmd/compile/internal/gc/scc.go
@ -82,6 +82,13 @@ func (v *bottomUpVisitor) visit(n *Node) uint32 {
 					min = m
 				}
 			}
 		case OCALLPART:
 			fn := asNode(callpartMethod(n).Type.Nname())
 			if fn != nil && fn.Op == ONAME && fn.Class() == PFUNC && fn.Name.Defn != nil {
 				if m := v.visit(fn.Name.Defn); m < min {
 					min = m
 				}
 			}
 		case OCLOSURE:
 			if m := v.visit(n.Func.Closure); m < min {
 				min = m
--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@ -71,7 +71,7 @@ func (s *InitSchedule) staticcopy(l *Node, r *Node) bool {
 		return false
 	}
 	if r.Class() == PFUNC {
-		gdata(l, r, Widthptr)
+		pfuncsym(l, r)
 		return true
 	}
 	if r.Class() != PEXTERN || r.Sym.Pkg != localpkg {
@ -107,13 +107,12 @@ func (s *InitSchedule) staticcopy(l *Node, r *Node) bool {
 		if isZero(r) {
 			return true
 		}
-		gdata(l, r, int(l.Type.Width))
+		litsym(l, r, int(l.Type.Width))
 		return true
 	case OADDR:
-		switch r.Left.Op {
+		if a := r.Left; a.Op == ONAME {
-		case ONAME:
+			addrsym(l, a)
 			gdata(l, r, int(l.Type.Width))
 			return true
 		}
@ -121,21 +120,14 @@ func (s *InitSchedule) staticcopy(l *Node, r *Node) bool {
 		switch r.Left.Op {
 		case OARRAYLIT, OSLICELIT, OSTRUCTLIT, OMAPLIT:
 			// copy pointer
-			gdata(l, nod(OADDR, s.inittemps[r], nil), int(l.Type.Width))
+			addrsym(l, s.inittemps[r])
 			return true
 		}
 	case OSLICELIT:
 		// copy slice
 		a := s.inittemps[r]
-
+		slicesym(l, a, r.Right.Int64())
 		n := l.copy()
 		n.Xoffset = l.Xoffset + int64(slice_array)
 		gdata(n, nod(OADDR, a, nil), Widthptr)
 		n.Xoffset = l.Xoffset + int64(slice_nel)
 		gdata(n, r.Right, Widthptr)
 		n.Xoffset = l.Xoffset + int64(slice_cap)
 		gdata(n, r.Right, Widthptr)
 		return true
 	case OARRAYLIT, OSTRUCTLIT:
@ -147,7 +139,7 @@ func (s *InitSchedule) staticcopy(l *Node, r *Node) bool {
 			n.Xoffset = l.Xoffset + e.Xoffset
 			n.Type = e.Expr.Type
 			if e.Expr.Op == OLITERAL {
-				gdata(n, e.Expr, int(n.Type.Width))
+				litsym(n, e.Expr, int(n.Type.Width))
 				continue
 			}
 			ll := n.sepcopy()
@ -182,15 +174,13 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 		if isZero(r) {
 			return true
 		}
-		gdata(l, r, int(l.Type.Width))
+		litsym(l, r, int(l.Type.Width))
 		return true
 	case OADDR:
 		var nam Node
 		if stataddr(&nam, r.Left) {
-			n := *r
+			addrsym(l, &nam)
 			n.Left = &nam
 			gdata(l, &n, int(l.Type.Width))
 			return true
 		}
 		fallthrough
@ -202,7 +192,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 			a := staticname(r.Left.Type)
 			s.inittemps[r] = a
-			gdata(l, nod(OADDR, a, nil), int(l.Type.Width))
+			addrsym(l, a)
 			// Init underlying literal.
 			if !s.staticassign(a, r.Left) {
@ -215,7 +205,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 	case OSTR2BYTES:
 		if l.Class() == PEXTERN && r.Left.Op == OLITERAL {
 			sval := strlit(r.Left)
-			slicebytes(l, sval, len(sval))
+			slicebytes(l, sval)
 			return true
 		}
@ -224,16 +214,10 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 		// Init slice.
 		bound := r.Right.Int64()
 		ta := types.NewArray(r.Type.Elem(), bound)
 		ta.SetNoalg(true)
 		a := staticname(ta)
 		s.inittemps[r] = a
-		n := l.copy()
+		slicesym(l, a, bound)
 		n.Xoffset = l.Xoffset + int64(slice_array)
 		gdata(n, nod(OADDR, a, nil), Widthptr)
 		n.Xoffset = l.Xoffset + int64(slice_nel)
 		gdata(n, r.Right, Widthptr)
 		n.Xoffset = l.Xoffset + int64(slice_cap)
 		gdata(n, r.Right, Widthptr)
 		// Fall through to init underlying array.
 		l = a
 		fallthrough
@ -248,7 +232,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 			n.Xoffset = l.Xoffset + e.Xoffset
 			n.Type = e.Expr.Type
 			if e.Expr.Op == OLITERAL {
-				gdata(n, e.Expr, int(n.Type.Width))
+				litsym(n, e.Expr, int(n.Type.Width))
 				continue
 			}
 			setlineno(e.Expr)
@ -270,7 +254,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 			}
 			// Closures with no captured variables are globals,
 			// so the assignment can be done at link time.
-			gdata(l, r.Func.Closure.Func.Nname, Widthptr)
+			pfuncsym(l, r.Func.Closure.Func.Nname)
 			return true
 		}
 		closuredebugruntimecheck(r)
@ -304,7 +288,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 		n := l.copy()
 		// Emit itab, advance offset.
-		gdata(n, itab, Widthptr)
+		addrsym(n, itab.Left) // itab is an OADDR node
 		n.Xoffset += int64(Widthptr)
 		// Emit data.
@ -327,9 +311,7 @@ func (s *InitSchedule) staticassign(l *Node, r *Node) bool {
 			if !s.staticassign(a, val) {
 				s.append(nod(OAS, a, val))
 			}
-			ptr := nod(OADDR, a, nil)
+			addrsym(n, a)
 			n.Type = types.NewPtr(val.Type)
 			gdata(n, ptr, Widthptr)
 		}
 		return true
@ -610,18 +592,7 @@ func slicelit(ctxt initContext, n *Node, var_ *Node, init *Nodes) {
 		if !stataddr(&nam, var_) || nam.Class() != PEXTERN {
 			Fatalf("slicelit: %v", var_)
 		}
-
+		slicesym(&nam, vstat, t.NumElem())
 		var v Node
 		v.Type = types.Types[TINT]
 		setintconst(&v, t.NumElem())
 		nam.Xoffset += int64(slice_array)
 		gdata(&nam, nod(OADDR, vstat, nil), Widthptr)
 		nam.Xoffset += int64(slice_nel) - int64(slice_array)
 		gdata(&nam, &v, Widthptr)
 		nam.Xoffset += int64(slice_cap) - int64(slice_nel)
 		gdata(&nam, &v, Widthptr)
 		return
 	}
@ -789,7 +760,9 @@ func maplit(n *Node, m *Node, init *Nodes) {
 		tk := types.NewArray(n.Type.Key(), int64(len(entries)))
 		te := types.NewArray(n.Type.Elem(), int64(len(entries)))
-		// TODO(josharian): suppress alg generation for these types?
+		tk.SetNoalg(true)
 		te.SetNoalg(true)
 		dowidth(tk)
 		dowidth(te)
@ -1179,10 +1152,10 @@ func genAsStatic(as *Node) {
 	switch {
 	case as.Right.Op == OLITERAL:
 		litsym(&nam, as.Right, int(as.Right.Type.Width))
 	case as.Right.Op == ONAME && as.Right.Class() == PFUNC:
 		pfuncsym(&nam, as.Right)
 	default:
 		Fatalf("genAsStatic: rhs %v", as.Right)
 	}
 	gdata(&nam, as.Right, int(as.Right.Type.Width))
 }
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@ -339,7 +339,7 @@ func buildssa(fn *Node, worker int) *ssa.Func {
 	s.softFloat = s.config.SoftFloat
 	if printssa {
-		s.f.HTMLWriter = ssa.NewHTMLWriter(ssaDumpFile, s.f.Frontend(), name, ssaDumpCFG)
+		s.f.HTMLWriter = ssa.NewHTMLWriter(ssaDumpFile, s.f, ssaDumpCFG)
 		// TODO: generate and print a mapping from nodes to values and blocks
 		dumpSourcesColumn(s.f.HTMLWriter, fn)
 		s.f.HTMLWriter.WriteAST("AST", astBuf)
@ -394,7 +394,7 @@ func buildssa(fn *Node, worker int) *ssa.Func {
 		// For this value, AuxInt is initialized to zero by default
 		startDeferBits := s.entryNewValue0(ssa.OpConst8, types.Types[TUINT8])
 		s.vars[&deferBitsVar] = startDeferBits
-		s.deferBitsAddr = s.addr(deferBitsTemp, false)
+		s.deferBitsAddr = s.addr(deferBitsTemp)
 		s.store(types.Types[TUINT8], s.deferBitsAddr, startDeferBits)
 		// Make sure that the deferBits stack slot is kept alive (for use
 		// by panics) and stores to deferBits are not eliminated, even if
@ -471,7 +471,7 @@ func dumpSourcesColumn(writer *ssa.HTMLWriter, fn *Node) {
 	fname := Ctxt.PosTable.Pos(fn.Pos).Filename()
 	targetFn, err := readFuncLines(fname, fn.Pos.Line(), fn.Func.Endlineno.Line())
 	if err != nil {
-		writer.Logger.Logf("cannot read sources for function %v: %v", fn, err)
+		writer.Logf("cannot read sources for function %v: %v", fn, err)
 	}
 	// Read sources of inlined functions.
@ -487,7 +487,7 @@ func dumpSourcesColumn(writer *ssa.HTMLWriter, fn *Node) {
 		fname := Ctxt.PosTable.Pos(fi.Pos).Filename()
 		fnLines, err := readFuncLines(fname, fi.Pos.Line(), elno.Line())
 		if err != nil {
-			writer.Logger.Logf("cannot read sources for function %v: %v", fi, err)
+			writer.Logf("cannot read sources for inlined function %v: %v", fi, err)
 			continue
 		}
 		inlFns = append(inlFns, fnLines)
@ -1246,7 +1246,7 @@ func (s *state) stmt(n *Node) {
 			if rhs == nil {
 				r = nil // Signal assign to use OpZero.
 			} else {
-				r = s.addr(rhs, false)
+				r = s.addr(rhs)
 			}
 		} else {
 			if rhs == nil {
@ -1742,9 +1742,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OLT, TFLOAT64}: ssa.OpLess64F,
 	opAndType{OLT, TFLOAT32}: ssa.OpLess32F,
 	opAndType{OGT, TFLOAT64}: ssa.OpGreater64F,
 	opAndType{OGT, TFLOAT32}: ssa.OpGreater32F,
 	opAndType{OLE, TINT8}:    ssa.OpLeq8,
 	opAndType{OLE, TUINT8}:   ssa.OpLeq8U,
 	opAndType{OLE, TINT16}:   ssa.OpLeq16,
@ -1755,9 +1752,6 @@ var opToSSA = map[opAndType]ssa.Op{
 	opAndType{OLE, TUINT64}:  ssa.OpLeq64U,
 	opAndType{OLE, TFLOAT64}: ssa.OpLeq64F,
 	opAndType{OLE, TFLOAT32}: ssa.OpLeq32F,
 	opAndType{OGE, TFLOAT64}: ssa.OpGeq64F,
 	opAndType{OGE, TFLOAT32}: ssa.OpGeq32F,
 }
 func (s *state) concreteEtype(t *types.Type) types.EType {
@ -2014,10 +2008,10 @@ func (s *state) expr(n *Node) *ssa.Value {
 		if s.canSSA(n) {
 			return s.variable(n, n.Type)
 		}
-		addr := s.addr(n, false)
+		addr := s.addr(n)
 		return s.load(n.Type, addr)
 	case OCLOSUREVAR:
-		addr := s.addr(n, false)
+		addr := s.addr(n)
 		return s.load(n.Type, addr)
 	case OLITERAL:
 		switch u := n.Val().U.(type) {
@ -2345,11 +2339,8 @@ func (s *state) expr(n *Node) *ssa.Value {
 				s.Fatalf("ordered complex compare %v", n.Op)
 			}
 		}
 		if n.Left.Type.IsFloat() {
 			return s.newValueOrSfCall2(s.ssaOp(n.Op, n.Left.Type), types.Types[TBOOL], a, b)
 		}
-		// Integer: convert OGE and OGT into OLE and OLT.
+		// Convert OGE and OGT into OLE and OLT.
 		op := n.Op
 		switch op {
 		case OGE:
@ -2357,6 +2348,11 @@ func (s *state) expr(n *Node) *ssa.Value {
 		case OGT:
 			op, a, b = OLT, b, a
 		}
 		if n.Left.Type.IsFloat() {
 			// float comparison
 			return s.newValueOrSfCall2(s.ssaOp(op, n.Left.Type), types.Types[TBOOL], a, b)
 		}
 		// integer comparison
 		return s.newValue2(s.ssaOp(op, n.Left.Type), types.Types[TBOOL], a, b)
 	case OMUL:
 		a := s.expr(n.Left)
@ -2546,14 +2542,14 @@ func (s *state) expr(n *Node) *ssa.Value {
 		return s.expr(n.Left)
 	case OADDR:
-		return s.addr(n.Left, n.Bounded())
+		return s.addr(n.Left)
 	case ORESULT:
 		addr := s.constOffPtrSP(types.NewPtr(n.Type), n.Xoffset)
 		return s.load(n.Type, addr)
 	case ODEREF:
-		p := s.exprPtr(n.Left, false, n.Pos)
+		p := s.exprPtr(n.Left, n.Bounded(), n.Pos)
 		return s.load(n.Type, p)
 	case ODOT:
@ -2571,14 +2567,14 @@ func (s *state) expr(n *Node) *ssa.Value {
 		// prevents false memory dependencies in race/msan
 		// instrumentation.
 		if islvalue(n) && !s.canSSA(n) {
-			p := s.addr(n, false)
+			p := s.addr(n)
 			return s.load(n.Type, p)
 		}
 		v := s.expr(n.Left)
 		return s.newValue1I(ssa.OpStructSelect, n.Type, int64(fieldIdx(n)), v)
 	case ODOTPTR:
-		p := s.exprPtr(n.Left, false, n.Pos)
+		p := s.exprPtr(n.Left, n.Bounded(), n.Pos)
 		p = s.newValue1I(ssa.OpOffPtr, types.NewPtr(n.Type), n.Xoffset, p)
 		return s.load(n.Type, p)
@ -2604,7 +2600,7 @@ func (s *state) expr(n *Node) *ssa.Value {
 			}
 			return s.load(types.Types[TUINT8], ptr)
 		case n.Left.Type.IsSlice():
-			p := s.addr(n, false)
+			p := s.addr(n)
 			return s.load(n.Left.Type.Elem(), p)
 		case n.Left.Type.IsArray():
 			if canSSAType(n.Left.Type) {
@ -2624,7 +2620,7 @@ func (s *state) expr(n *Node) *ssa.Value {
 				s.boundsCheck(i, len, ssa.BoundsIndex, n.Bounded()) // checks i == 0
 				return s.newValue1I(ssa.OpArraySelect, n.Type, 0, a)
 			}
-			p := s.addr(n, false)
+			p := s.addr(n)
 			return s.load(n.Left.Type.Elem(), p)
 		default:
 			s.Fatalf("bad type for index %v", n.Left.Type)
@ -2790,7 +2786,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 	var slice, addr *ssa.Value
 	if inplace {
-		addr = s.addr(sn, false)
+		addr = s.addr(sn)
 		slice = s.load(n.Type, addr)
 	} else {
 		slice = s.expr(sn)
@ -2834,7 +2830,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 			// Tell liveness we're about to build a new slice
 			s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, sn, s.mem())
 		}
-		capaddr := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.IntPtr, int64(slice_cap), addr)
+		capaddr := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.IntPtr, sliceCapOffset, addr)
 		s.store(types.Types[TINT], capaddr, r[2])
 		s.store(pt, addr, r[0])
 		// load the value we just stored to avoid having to spill it
@ -2855,7 +2851,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 	if inplace {
 		l = s.variable(&lenVar, types.Types[TINT]) // generates phi for len
 		nl = s.newValue2(s.ssaOp(OADD, types.Types[TINT]), types.Types[TINT], l, s.constInt(types.Types[TINT], nargs))
-		lenaddr := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.IntPtr, int64(slice_nel), addr)
+		lenaddr := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.IntPtr, sliceLenOffset, addr)
 		s.store(types.Types[TINT], lenaddr, nl)
 	}
@ -2871,7 +2867,7 @@ func (s *state) append(n *Node, inplace bool) *ssa.Value {
 		if canSSAType(n.Type) {
 			args = append(args, argRec{v: s.expr(n), store: true})
 		} else {
-			v := s.addr(n, false)
+			v := s.addr(n)
 			args = append(args, argRec{v: v})
 		}
 	}
@ -3042,7 +3038,7 @@ func (s *state) assign(left *Node, right *ssa.Value, deref bool, skip skipMask)
 	}
 	// Left is not ssa-able. Compute its address.
-	addr := s.addr(left, false)
+	addr := s.addr(left)
 	if isReflectHeaderDataField(left) {
 		// Package unsafe's documentation says storing pointers into
 		// reflect.SliceHeader and reflect.StringHeader's Data fields
@ -3158,18 +3154,14 @@ func softfloatInit() {
 		ssa.OpDiv32F: sfRtCallDef{sysfunc("fdiv32"), TFLOAT32},
 		ssa.OpDiv64F: sfRtCallDef{sysfunc("fdiv64"), TFLOAT64},
-		ssa.OpEq64F:      sfRtCallDef{sysfunc("feq64"), TBOOL},
+		ssa.OpEq64F:   sfRtCallDef{sysfunc("feq64"), TBOOL},
-		ssa.OpEq32F:      sfRtCallDef{sysfunc("feq32"), TBOOL},
+		ssa.OpEq32F:   sfRtCallDef{sysfunc("feq32"), TBOOL},
-		ssa.OpNeq64F:     sfRtCallDef{sysfunc("feq64"), TBOOL},
+		ssa.OpNeq64F:  sfRtCallDef{sysfunc("feq64"), TBOOL},
-		ssa.OpNeq32F:     sfRtCallDef{sysfunc("feq32"), TBOOL},
+		ssa.OpNeq32F:  sfRtCallDef{sysfunc("feq32"), TBOOL},
-		ssa.OpLess64F:    sfRtCallDef{sysfunc("fgt64"), TBOOL},
+		ssa.OpLess64F: sfRtCallDef{sysfunc("fgt64"), TBOOL},
-		ssa.OpLess32F:    sfRtCallDef{sysfunc("fgt32"), TBOOL},
+		ssa.OpLess32F: sfRtCallDef{sysfunc("fgt32"), TBOOL},
-		ssa.OpGreater64F: sfRtCallDef{sysfunc("fgt64"), TBOOL},
+		ssa.OpLeq64F:  sfRtCallDef{sysfunc("fge64"), TBOOL},
-		ssa.OpGreater32F: sfRtCallDef{sysfunc("fgt32"), TBOOL},
+		ssa.OpLeq32F:  sfRtCallDef{sysfunc("fge32"), TBOOL},
 		ssa.OpLeq64F:     sfRtCallDef{sysfunc("fge64"), TBOOL},
 		ssa.OpLeq32F:     sfRtCallDef{sysfunc("fge32"), TBOOL},
 		ssa.OpGeq64F:     sfRtCallDef{sysfunc("fge64"), TBOOL},
 		ssa.OpGeq32F:     sfRtCallDef{sysfunc("fge32"), TBOOL},
 		ssa.OpCvt32to32F:  sfRtCallDef{sysfunc("fint32to32"), TFLOAT32},
 		ssa.OpCvt32Fto32:  sfRtCallDef{sysfunc("f32toint32"), TINT32},
@ -3285,10 +3277,7 @@ func init() {
 				// Compiler frontend optimizations emit OBYTES2STRTMP nodes
 				// for the backend instead of slicebytetostringtmp calls
 				// when not instrumenting.
-				slice := args[0]
+				return s.newValue2(ssa.OpStringMake, n.Type, args[0], args[1])
 				ptr := s.newValue1(ssa.OpSlicePtr, s.f.Config.Types.BytePtr, slice)
 				len := s.newValue1(ssa.OpSliceLen, types.Types[TINT], slice)
 				return s.newValue2(ssa.OpStringMake, n.Type, ptr, len)
 			},
 			all...)
 	}
@ -3547,7 +3536,7 @@ func init() {
 		func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpSqrt, types.Types[TFLOAT64], args[0])
 		},
-		sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.S390X, sys.Wasm)
+		sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
 	addF("math", "Trunc",
 		func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
 			return s.newValue1(ssa.OpTrunc, types.Types[TFLOAT64], args[0])
@ -3595,8 +3584,7 @@ func init() {
 				s.vars[n] = s.load(types.Types[TFLOAT64], a)
 				return s.variable(n, types.Types[TFLOAT64])
 			}
-			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasFMA, s.sb)
+			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasFMA)
 			v := s.load(types.Types[TBOOL], addr)
 			b := s.endBlock()
 			b.Kind = ssa.BlockIf
 			b.SetControl(v)
@ -3661,8 +3649,7 @@ func init() {
 	makeRoundAMD64 := func(op ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
 		return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasSSE41, s.sb)
+			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasSSE41)
 			v := s.load(types.Types[TBOOL], addr)
 			b := s.endBlock()
 			b.Kind = ssa.BlockIf
 			b.SetControl(v)
@ -3869,8 +3856,7 @@ func init() {
 	makeOnesCountAMD64 := func(op64 ssa.Op, op32 ssa.Op) func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
 		return func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
-			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), x86HasPOPCNT, s.sb)
+			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[TBOOL], x86HasPOPCNT)
 			v := s.load(types.Types[TBOOL], addr)
 			b := s.endBlock()
 			b.Kind = ssa.BlockIf
 			b.SetControl(v)
@ -4229,7 +4215,7 @@ func (s *state) openDeferSave(n *Node, t *types.Type, val *ssa.Value) *ssa.Value
 		argTemp.Name.SetNeedzero(true)
 	}
 	if !canSSA {
-		a := s.addr(n, false)
+		a := s.addr(n)
 		s.move(t, addrArgTemp, a)
 		return addrArgTemp
 	}
@ -4401,7 +4387,7 @@ func (s *state) call(n *Node, k callKind) *ssa.Value {
 		d := tempAt(n.Pos, s.curfn, t)
 		s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, d, s.mem())
-		addr := s.addr(d, false)
+		addr := s.addr(d)
 		// Must match reflect.go:deferstruct and src/runtime/runtime2.go:_defer.
 		// 0: siz
@ -4592,9 +4578,7 @@ func etypesign(e types.EType) int8 {
 // addr converts the address of the expression n to SSA, adds it to s and returns the SSA result.
 // The value that the returned Value represents is guaranteed to be non-nil.
-// If bounded is true then this address does not require a nil check for its operand
+func (s *state) addr(n *Node) *ssa.Value {
 // even if that would otherwise be implied.
 func (s *state) addr(n *Node, bounded bool) *ssa.Value {
 	if n.Op != ONAME {
 		s.pushLine(n.Pos)
 		defer s.popLine()
@ -4647,25 +4631,25 @@ func (s *state) addr(n *Node, bounded bool) *ssa.Value {
 			p := s.newValue1(ssa.OpSlicePtr, t, a)
 			return s.newValue2(ssa.OpPtrIndex, t, p, i)
 		} else { // array
-			a := s.addr(n.Left, bounded)
+			a := s.addr(n.Left)
 			i := s.expr(n.Right)
 			len := s.constInt(types.Types[TINT], n.Left.Type.NumElem())
 			i = s.boundsCheck(i, len, ssa.BoundsIndex, n.Bounded())
 			return s.newValue2(ssa.OpPtrIndex, types.NewPtr(n.Left.Type.Elem()), a, i)
 		}
 	case ODEREF:
-		return s.exprPtr(n.Left, bounded, n.Pos)
+		return s.exprPtr(n.Left, n.Bounded(), n.Pos)
 	case ODOT:
-		p := s.addr(n.Left, bounded)
+		p := s.addr(n.Left)
 		return s.newValue1I(ssa.OpOffPtr, t, n.Xoffset, p)
 	case ODOTPTR:
-		p := s.exprPtr(n.Left, bounded, n.Pos)
+		p := s.exprPtr(n.Left, n.Bounded(), n.Pos)
 		return s.newValue1I(ssa.OpOffPtr, t, n.Xoffset, p)
 	case OCLOSUREVAR:
 		return s.newValue1I(ssa.OpOffPtr, t, n.Xoffset,
 			s.entryNewValue0(ssa.OpGetClosurePtr, s.f.Config.Types.BytePtr))
 	case OCONVNOP:
-		addr := s.addr(n.Left, bounded)
+		addr := s.addr(n.Left)
 		return s.newValue1(ssa.OpCopy, t, addr) // ensure that addr has the right type
 	case OCALLFUNC, OCALLINTER, OCALLMETH:
 		return s.call(n, callNormal)
@ -5090,7 +5074,7 @@ func (s *state) storeArgWithBase(n *Node, t *types.Type, base *ssa.Value, off in
 	}
 	if !canSSAType(t) {
-		a := s.addr(n, false)
+		a := s.addr(n)
 		s.move(t, addr, a)
 		return
 	}
@ -5644,7 +5628,7 @@ func (s *state) dottype(n *Node, commaok bool) (res, resok *ssa.Value) {
 		// TODO: get rid of some of these temporaries.
 		tmp = tempAt(n.Pos, s.curfn, n.Type)
 		s.vars[&memVar] = s.newValue1A(ssa.OpVarDef, types.TypeMem, tmp, s.mem())
-		addr = s.addr(tmp, false)
+		addr = s.addr(tmp)
 	}
 	cond := s.newValue2(ssa.OpEqPtr, types.Types[TBOOL], itab, targetITab)
@ -6027,7 +6011,7 @@ func genssa(f *ssa.Func, pp *Progs) {
 		// instruction. We won't use the actual liveness map on a
 		// control instruction. Just mark it something that is
 		// preemptible.
-		s.pp.nextLive = LivenessIndex{-1, -1}
+		s.pp.nextLive = LivenessIndex{-1, -1, false}
 		// Emit values in block
 		thearch.SSAMarkMoves(&s, b)
@ -6360,20 +6344,6 @@ func (s *SSAGenState) FPJump(b, next *ssa.Block, jumps *[2][2]FloatingEQNEJump)
 	}
 }
 func AuxOffset(v *ssa.Value) (offset int64) {
 	if v.Aux == nil {
 		return 0
 	}
 	n, ok := v.Aux.(*Node)
 	if !ok {
 		v.Fatalf("bad aux type in %s\n", v.LongString())
 	}
 	if n.Class() == PAUTO {
 		return n.Xoffset
 	}
 	return 0
 }
 // AddAux adds the offset in the aux fields (AuxInt and Aux) of v to a.
 func AddAux(a *obj.Addr, v *ssa.Value) {
 	AddAux2(a, v, v.AuxInt)
@ -6601,10 +6571,8 @@ func (s *SSAGenState) Call(v *ssa.Value) *obj.Prog {
 // since it emits PCDATA for the stack map at the call (calls are safe points).
 func (s *SSAGenState) PrepareCall(v *ssa.Value) {
 	idx := s.livenessMap.Get(v)
-	if !idx.Valid() {
+	if !idx.StackMapValid() {
-		// typedmemclr and typedmemmove are write barriers and
+		// See Liveness.hasStackMap.
 		// deeply non-preemptible. They are unsafe points and
 		// hence should not have liveness maps.
 		if sym, _ := v.Aux.(*obj.LSym); !(sym == typedmemclr || sym == typedmemmove) {
 			Fatalf("missing stack map index for %v", v.LongString())
 		}
@ -6672,21 +6640,21 @@ func fieldIdx(n *Node) int {
 // It also exports a bunch of compiler services for the ssa backend.
 type ssafn struct {
 	curfn        *Node
-	strings      map[string]interface{} // map from constant string to data symbols
+	strings      map[string]*obj.LSym // map from constant string to data symbols
-	scratchFpMem *Node                  // temp for floating point register / memory moves on some architectures
+	scratchFpMem *Node                // temp for floating point register / memory moves on some architectures
-	stksize      int64                  // stack size for current frame
+	stksize      int64                // stack size for current frame
-	stkptrsize   int64                  // prefix of stack containing pointers
+	stkptrsize   int64                // prefix of stack containing pointers
-	log          bool                   // print ssa debug to the stdout
+	log          bool                 // print ssa debug to the stdout
 }
-// StringData returns a symbol (a *types.Sym wrapped in an interface) which
+// StringData returns a symbol which
 // is the data component of a global string constant containing s.
-func (e *ssafn) StringData(s string) interface{} {
+func (e *ssafn) StringData(s string) *obj.LSym {
 	if aux, ok := e.strings[s]; ok {
 		return aux
 	}
 	if e.strings == nil {
-		e.strings = make(map[string]interface{})
+		e.strings = make(map[string]*obj.LSym)
 	}
 	data := stringsym(e.curfn.Pos, s)
 	e.strings[s] = data
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@ -376,7 +376,13 @@ func newnamel(pos src.XPos, s *types.Sym) *Node {
 // nodSym makes a Node with Op op and with the Left field set to left
 // and the Sym field set to sym. This is for ODOT and friends.
 func nodSym(op Op, left *Node, sym *types.Sym) *Node {
-	n := nod(op, left, nil)
+	return nodlSym(lineno, op, left, sym)
 }
 // nodlSym makes a Node with position Pos, with Op op, and with the Left field set to left
 // and the Sym field set to sym. This is for ODOT and friends.
 func nodlSym(pos src.XPos, op Op, left *Node, sym *types.Sym) *Node {
 	n := nodl(pos, op, left, nil)
 	n.Sym = sym
 	return n
 }
@ -923,6 +929,21 @@ func (o Op) IsSlice3() bool {
 	return false
 }
 // slicePtrLen extracts the pointer and length from a slice.
 // This constructs two nodes referring to n, so n must be a cheapexpr.
 func (n *Node) slicePtrLen() (ptr, len *Node) {
 	var init Nodes
 	c := cheapexpr(n, &init)
 	if c != n || init.Len() != 0 {
 		Fatalf("slicePtrLen not cheap: %v", n)
 	}
 	ptr = nod(OSPTR, n, nil)
 	ptr.Type = n.Type.Elem().PtrTo()
 	len = nod(OLEN, n, nil)
 	len.Type = types.Types[TINT]
 	return ptr, len
 }
 // labeledControl returns the control flow Node (for, switch, select)
 // associated with the label n, if any.
 func (n *Node) labeledControl() *Node {
@ -1881,18 +1902,21 @@ func itabType(itab *Node) *Node {
 // ifaceData loads the data field from an interface.
 // The concrete type must be known to have type t.
 // It follows the pointer if !isdirectiface(t).
-func ifaceData(n *Node, t *types.Type) *Node {
+func ifaceData(pos src.XPos, n *Node, t *types.Type) *Node {
-	ptr := nodSym(OIDATA, n, nil)
+	if t.IsInterface() {
 		Fatalf("ifaceData interface: %v", t)
 	}
 	ptr := nodlSym(pos, OIDATA, n, nil)
 	if isdirectiface(t) {
 		ptr.Type = t
 		ptr.SetTypecheck(1)
 		return ptr
 	}
 	ptr.Type = types.NewPtr(t)
 	ptr.SetBounded(true)
 	ptr.SetTypecheck(1)
-	ind := nod(ODEREF, ptr, nil)
+	ind := nodl(pos, ODEREF, ptr, nil)
 	ind.Type = t
 	ind.SetTypecheck(1)
 	ind.SetBounded(true)
 	return ind
 }
--- a/src/cmd/compile/internal/gc/swt.go
+++ b/src/cmd/compile/internal/gc/swt.go
@ -540,10 +540,14 @@ func walkTypeSwitch(sw *Node) {
 			caseVar = ncase.Rlist.First()
 		}
-		// For single-type cases, we initialize the case
+		// For single-type cases with an interface type,
-		// variable as part of the type assertion; but in
+		// we initialize the case variable as part of the type assertion.
-		// other cases, we initialize it in the body.
+		// In other cases, we initialize it in the body.
-		singleType := ncase.List.Len() == 1 && ncase.List.First().Op == OTYPE
+		var singleType *types.Type
 		if ncase.List.Len() == 1 && ncase.List.First().Op == OTYPE {
 			singleType = ncase.List.First().Type
 		}
 		caseVarInitialized := false
 		label := autolabel(".s")
 		jmp := npos(ncase.Pos, nodSym(OGOTO, nil, label))
@ -564,18 +568,27 @@ func walkTypeSwitch(sw *Node) {
 				continue
 			}
-			if singleType {
+			if singleType != nil && singleType.IsInterface() {
-				s.Add(n1.Type, caseVar, jmp)
+				s.Add(ncase.Pos, n1.Type, caseVar, jmp)
 				caseVarInitialized = true
 			} else {
-				s.Add(n1.Type, nil, jmp)
+				s.Add(ncase.Pos, n1.Type, nil, jmp)
 			}
 		}
 		body.Append(npos(ncase.Pos, nodSym(OLABEL, nil, label)))
-		if caseVar != nil && !singleType {
+		if caseVar != nil && !caseVarInitialized {
 			val := s.facename
 			if singleType != nil {
 				// We have a single concrete type. Extract the data.
 				if singleType.IsInterface() {
 					Fatalf("singleType interface should have been handled in Add")
 				}
 				val = ifaceData(ncase.Pos, s.facename, singleType)
 			}
 			l := []*Node{
 				nodl(ncase.Pos, ODCL, caseVar, nil),
-				nodl(ncase.Pos, OAS, caseVar, s.facename),
+				nodl(ncase.Pos, OAS, caseVar, val),
 			}
 			typecheckslice(l, ctxStmt)
 			body.Append(l...)
@ -616,12 +629,12 @@ type typeClause struct {
 	body Nodes
 }
-func (s *typeSwitch) Add(typ *types.Type, caseVar *Node, jmp *Node) {
+func (s *typeSwitch) Add(pos src.XPos, typ *types.Type, caseVar, jmp *Node) {
 	var body Nodes
 	if caseVar != nil {
 		l := []*Node{
-			nod(ODCL, caseVar, nil),
+			nodl(pos, ODCL, caseVar, nil),
-			nod(OAS, caseVar, nil),
+			nodl(pos, OAS, caseVar, nil),
 		}
 		typecheckslice(l, ctxStmt)
 		body.Append(l...)
@ -630,9 +643,9 @@ func (s *typeSwitch) Add(typ *types.Type, caseVar *Node, jmp *Node) {
 	}
 	// cv, ok = iface.(type)
-	as := nod(OAS2, nil, nil)
+	as := nodl(pos, OAS2, nil, nil)
 	as.List.Set2(caseVar, s.okname) // cv, ok =
-	dot := nod(ODOTTYPE, s.facename, nil)
+	dot := nodl(pos, ODOTTYPE, s.facename, nil)
 	dot.Type = typ // iface.(type)
 	as.Rlist.Set1(dot)
 	as = typecheck(as, ctxStmt)
@ -640,7 +653,7 @@ func (s *typeSwitch) Add(typ *types.Type, caseVar *Node, jmp *Node) {
 	body.Append(as)
 	// if ok { goto label }
-	nif := nod(OIF, nil, nil)
+	nif := nodl(pos, OIF, nil, nil)
 	nif.Left = s.okname
 	nif.Nbody.Set1(jmp)
 	body.Append(nif)
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@ -8,7 +8,6 @@ package gc
 import (
 	"cmd/compile/internal/ssa"
 	"cmd/compile/internal/syntax"
 	"cmd/compile/internal/types"
 	"cmd/internal/obj"
 	"cmd/internal/objabi"
@ -188,15 +187,39 @@ func (n *Node) SetImplicit(b bool)  { n.flags.set(nodeImplicit, b) }
 func (n *Node) SetIsDDD(b bool)     { n.flags.set(nodeIsDDD, b) }
 func (n *Node) SetDiag(b bool)      { n.flags.set(nodeDiag, b) }
 func (n *Node) SetColas(b bool)     { n.flags.set(nodeColas, b) }
 func (n *Node) SetNonNil(b bool)    { n.flags.set(nodeNonNil, b) }
 func (n *Node) SetTransient(b bool) { n.flags.set(nodeTransient, b) }
 func (n *Node) SetBounded(b bool)   { n.flags.set(nodeBounded, b) }
 func (n *Node) SetHasCall(b bool)   { n.flags.set(nodeHasCall, b) }
 func (n *Node) SetLikely(b bool)    { n.flags.set(nodeLikely, b) }
 func (n *Node) SetHasVal(b bool)    { n.flags.set(nodeHasVal, b) }
 func (n *Node) SetHasOpt(b bool)    { n.flags.set(nodeHasOpt, b) }
 func (n *Node) SetEmbedded(b bool)  { n.flags.set(nodeEmbedded, b) }
 // MarkNonNil marks a pointer n as being guaranteed non-nil,
 // on all code paths, at all times.
 // During conversion to SSA, non-nil pointers won't have nil checks
 // inserted before dereferencing. See state.exprPtr.
 func (n *Node) MarkNonNil() {
 	if !n.Type.IsPtr() && !n.Type.IsUnsafePtr() {
 		Fatalf("MarkNonNil(%v), type %v", n, n.Type)
 	}
 	n.flags.set(nodeNonNil, true)
 }
 // SetBounded indicates whether operation n does not need safety checks.
 // When n is an index or slice operation, n does not need bounds checks.
 // When n is a dereferencing operation, n does not need nil checks.
 func (n *Node) SetBounded(b bool) {
 	switch n.Op {
 	case OINDEX, OSLICE, OSLICEARR, OSLICE3, OSLICE3ARR, OSLICESTR:
 		// No bounds checks needed.
 	case ODOTPTR, ODEREF:
 		// No nil check needed.
 	default:
 		Fatalf("SetBounded(%v)", n)
 	}
 	n.flags.set(nodeBounded, b)
 }
 // MarkReadonly indicates that n is an ONAME with readonly contents.
 func (n *Node) MarkReadonly() {
 	if n.Op != ONAME {
@ -311,6 +334,10 @@ func (n *Node) pkgFuncName() string {
 	return p + "." + s.Name
 }
 // The compiler needs *Node to be assignable to cmd/compile/internal/ssa.Sym.
 func (n *Node) CanBeAnSSASym() {
 }
 // Name holds Node fields used only by named nodes (ONAME, OTYPE, OPACK, OLABEL, some OLITERAL).
 type Name struct {
 	Pack      *Node      // real package for import . names
@ -455,7 +482,7 @@ type Param struct {
 	// OTYPE
 	//
 	// TODO: Should Func pragmas also be stored on the Name?
-	Pragma syntax.Pragma
+	Pragma PragmaFlag
 	Alias  bool // node is alias for Ntype (only used when type-checking ODCLTYPE)
 }
@ -537,7 +564,7 @@ type Func struct {
 	Endlineno src.XPos
 	WBPos     src.XPos // position of first write barrier; see SetWBPos
-	Pragma syntax.Pragma // go:xxx function annotations
+	Pragma PragmaFlag // go:xxx function annotations
 	flags      bitset16
 	numDefers  int // number of defer calls in the function
@ -659,10 +686,8 @@ const (
 	// OCALLFUNC, OCALLMETH, and OCALLINTER have the same structure.
 	// Prior to walk, they are: Left(List), where List is all regular arguments.
 	// If present, Right is an ODDDARG that holds the
 	// generated slice used in a call to a variadic function.
 	// After walk, List is a series of assignments to temporaries,
-	// and Rlist is an updated set of arguments, including any ODDDARG slice.
+	// and Rlist is an updated set of arguments.
 	// TODO(josharian/khr): Use Ninit instead of List for the assignments to temporaries. See CL 114797.
 	OCALLFUNC  // Left(List/Rlist) (function call f(args))
 	OCALLMETH  // Left(List/Rlist) (direct method call x.Method(args))
@ -790,7 +815,6 @@ const (
 	// misc
 	ODDD        // func f(args ...int) or f(l...) or var a = [...]int{0, 1, 2}.
 	ODDDARG     // func f(args ...int), introduced by escape analysis.
 	OINLCALL    // intermediary representation of an inlined call.
 	OEFACE      // itable and data words of an empty-interface value.
 	OITAB       // itable word of an interface value.
--- a/src/cmd/compile/internal/gc/universe.go
+++ b/src/cmd/compile/internal/gc/universe.go
@ -342,13 +342,13 @@ func typeinit() {
 	simtype[TFUNC] = TPTR
 	simtype[TUNSAFEPTR] = TPTR
-	slice_array = int(Rnd(0, int64(Widthptr)))
+	slicePtrOffset = 0
-	slice_nel = int(Rnd(int64(slice_array)+int64(Widthptr), int64(Widthptr)))
+	sliceLenOffset = Rnd(slicePtrOffset+int64(Widthptr), int64(Widthptr))
-	slice_cap = int(Rnd(int64(slice_nel)+int64(Widthptr), int64(Widthptr)))
+	sliceCapOffset = Rnd(sliceLenOffset+int64(Widthptr), int64(Widthptr))
-	sizeof_Slice = int(Rnd(int64(slice_cap)+int64(Widthptr), int64(Widthptr)))
+	sizeofSlice = Rnd(sliceCapOffset+int64(Widthptr), int64(Widthptr))
 	// string is same as slice wo the cap
-	sizeof_String = int(Rnd(int64(slice_nel)+int64(Widthptr), int64(Widthptr)))
+	sizeofString = Rnd(sliceLenOffset+int64(Widthptr), int64(Widthptr))
 	dowidth(types.Types[TSTRING])
 	dowidth(types.Idealstring)
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@ -81,18 +81,6 @@ func walkstmtlist(s []*Node) {
 	}
 }
 func samelist(a, b []*Node) bool {
 	if len(a) != len(b) {
 		return false
 	}
 	for i, n := range a {
 		if n != b[i] {
 			return false
 		}
 	}
 	return true
 }
 func paramoutheap(fn *Node) bool {
 	for _, ln := range fn.Func.Dcl {
 		switch ln.Class() {
@ -295,16 +283,6 @@ func walkstmt(n *Node) *Node {
 				Fatalf("expected %v return arguments, have %v", want, got)
 			}
 			if samelist(rl, n.List.Slice()) {
 				// special return in disguise
 				// TODO(josharian, 1.12): is "special return" still relevant?
 				// Tests still pass w/o this. See comments on https://go-review.googlesource.com/c/go/+/118318
 				walkexprlist(n.List.Slice(), &n.Ninit)
 				n.List.Set(nil)
 				break
 			}
 			// move function calls out, to make reorder3's job easier.
 			walkexprlistsafe(n.List.Slice(), &n.Ninit)
@ -479,7 +457,7 @@ func walkexpr(n *Node, init *Nodes) *Node {
 		nn := nod(ODEREF, n.Name.Param.Heapaddr, nil)
 		nn = typecheck(nn, ctxExpr)
 		nn = walkexpr(nn, init)
-		nn.Left.SetNonNil(true)
+		nn.Left.MarkNonNil()
 		return nn
 	}
@ -784,7 +762,7 @@ opswitch:
 		if !a.isBlank() {
 			var_ := temp(types.NewPtr(t.Elem()))
 			var_.SetTypecheck(1)
-			var_.SetNonNil(true) // mapaccess always returns a non-nil pointer
+			var_.MarkNonNil() // mapaccess always returns a non-nil pointer
 			n.List.SetFirst(var_)
 			n = walkexpr(n, init)
 			init.Append(n)
@ -862,7 +840,6 @@ opswitch:
 			n.Left = cheapexpr(n.Left, init)
 			// byteindex widens n.Left so that the multiplication doesn't overflow.
 			index := nod(OLSH, byteindex(n.Left), nodintconst(3))
 			index.SetBounded(true)
 			if thearch.LinkArch.ByteOrder == binary.BigEndian {
 				index = nod(OADD, index, nodintconst(7))
 			}
@ -908,7 +885,7 @@ opswitch:
 			init.Append(nif)
 			// Build the result.
-			e := nod(OEFACE, tmp, ifaceData(c, types.NewPtr(types.Types[TUINT8])))
+			e := nod(OEFACE, tmp, ifaceData(n.Pos, c, types.NewPtr(types.Types[TUINT8])))
 			e.Type = toType // assign type manually, typecheck doesn't understand OEFACE.
 			e.SetTypecheck(1)
 			n = e
@ -1126,7 +1103,7 @@ opswitch:
 			}
 		}
 		n.Type = types.NewPtr(t.Elem())
-		n.SetNonNil(true) // mapaccess1* and mapassign always return non-nil pointers.
+		n.MarkNonNil() // mapaccess1* and mapassign always return non-nil pointers.
 		n = nod(ODEREF, n, nil)
 		n.Type = t.Elem()
 		n.SetTypecheck(1)
@ -1247,12 +1224,23 @@ opswitch:
 			// are stored with an indirection. So max bucket size is 2048+eps.
 			if !Isconst(hint, CTINT) ||
 				hint.Val().U.(*Mpint).CmpInt64(BUCKETSIZE) <= 0 {
 				// In case hint is larger than BUCKETSIZE runtime.makemap
 				// will allocate the buckets on the heap, see #20184
 				//
 				// if hint <= BUCKETSIZE {
 				//     var bv bmap
 				//     b = &bv
 				//     h.buckets = b
 				// }
 				nif := nod(OIF, nod(OLE, hint, nodintconst(BUCKETSIZE)), nil)
 				nif.SetLikely(true)
 				// var bv bmap
 				bv := temp(bmap(t))
 				zero = nod(OAS, bv, nil)
-				zero = typecheck(zero, ctxStmt)
+				nif.Nbody.Append(zero)
 				init.Append(zero)
 				// b = &bv
 				b := nod(OADDR, bv, nil)
@ -1260,8 +1248,11 @@ opswitch:
 				// h.buckets = b
 				bsym := hmapType.Field(5).Sym // hmap.buckets see reflect.go:hmap
 				na := nod(OAS, nodSym(ODOT, h, bsym), b)
-				na = typecheck(na, ctxStmt)
+				nif.Nbody.Append(na)
-				init.Append(na)
+
 				nif = typecheck(nif, ctxStmt)
 				nif = walkstmt(nif)
 				init.Append(nif)
 			}
 		}
@ -1338,7 +1329,8 @@ opswitch:
 				Fatalf("walkexpr: invalid index %v", r)
 			}
-			// cap is constrained to [0,2^31), so it's safe to do:
+			// cap is constrained to [0,2^31) or [0,2^63) depending on whether
 			// we're in 32-bit or 64-bit systems. So it's safe to do:
 			//
 			// if uint64(len) > cap {
 			//     if len < 0 { panicmakeslicelen() }
@ -1390,7 +1382,7 @@ opswitch:
 			fn := syslook(fnname)
 			m.Left = mkcall1(fn, types.Types[TUNSAFEPTR], init, typename(t.Elem()), conv(len, argtype), conv(cap, argtype))
-			m.Left.SetNonNil(true)
+			m.Left.MarkNonNil()
 			m.List.Set2(conv(len, types.Types[TINT]), conv(cap, types.Types[TINT]))
 			m = typecheck(m, ctxExpr)
@ -1414,13 +1406,15 @@ opswitch:
 			t := types.NewArray(types.Types[TUINT8], tmpstringbufsize)
 			a = nod(OADDR, temp(t), nil)
 		}
 		fn := "slicebytetostring"
 		if n.Op == ORUNES2STR {
-			fn = "slicerunetostring"
+			// slicerunetostring(*[32]byte, []rune) string
 			n = mkcall("slicerunetostring", n.Type, init, a, n.Left)
 		} else {
 			// slicebytetostring(*[32]byte, ptr *byte, n int) string
 			n.Left = cheapexpr(n.Left, init)
 			ptr, len := n.Left.slicePtrLen()
 			n = mkcall("slicebytetostring", n.Type, init, a, ptr, len)
 		}
 		// slicebytetostring(*[32]byte, []byte) string
 		// slicerunetostring(*[32]byte, []rune) string
 		n = mkcall(fn, n.Type, init, a, n.Left)
 	case OBYTES2STRTMP:
 		n.Left = walkexpr(n.Left, init)
@ -1429,8 +1423,10 @@ opswitch:
 			// to avoid a function call to slicebytetostringtmp.
 			break
 		}
-		// slicebytetostringtmp([]byte) string
+		// slicebytetostringtmp(ptr *byte, n int) string
-		n = mkcall("slicebytetostringtmp", n.Type, init, n.Left)
+		n.Left = cheapexpr(n.Left, init)
 		ptr, len := n.Left.slicePtrLen()
 		n = mkcall("slicebytetostringtmp", n.Type, init, ptr, len)
 	case OSTR2BYTES:
 		s := n.Left
@ -1721,57 +1717,56 @@ func ascompatet(nl Nodes, nr *types.Type) []*Node {
 }
 // package all the arguments that match a ... T parameter into a []T.
-func mkdotargslice(typ *types.Type, args []*Node, init *Nodes, ddd *Node) *Node {
+func mkdotargslice(typ *types.Type, args []*Node) *Node {
-	esc := uint16(EscUnknown)
+	var n *Node
 	if ddd != nil {
 		esc = ddd.Esc
 	}
 	if len(args) == 0 {
-		n := nodnil()
+		n = nodnil()
 		n.Type = typ
-		return n
+	} else {
 		n = nod(OCOMPLIT, nil, typenod(typ))
 		n.List.Append(args...)
 		n.SetImplicit(true)
 	}
 	n := nod(OCOMPLIT, nil, typenod(typ))
 	if ddd != nil && prealloc[ddd] != nil {
 		prealloc[n] = prealloc[ddd] // temporary to use
 	}
 	n.List.Set(args)
 	n.Esc = esc
 	n = typecheck(n, ctxExpr)
 	if n.Type == nil {
 		Fatalf("mkdotargslice: typecheck failed")
 	}
 	n = walkexpr(n, init)
 	return n
 }
 // fixVariadicCall rewrites calls to variadic functions to use an
 // explicit ... argument if one is not already present.
 func fixVariadicCall(call *Node) {
 	fntype := call.Left.Type
 	if !fntype.IsVariadic() || call.IsDDD() {
 		return
 	}
 	vi := fntype.NumParams() - 1
 	vt := fntype.Params().Field(vi).Type
 	args := call.List.Slice()
 	extra := args[vi:]
 	slice := mkdotargslice(vt, extra)
 	for i := range extra {
 		extra[i] = nil // allow GC
 	}
 	call.List.Set(append(args[:vi], slice))
 	call.SetIsDDD(true)
 }
 func walkCall(n *Node, init *Nodes) {
 	if n.Rlist.Len() != 0 {
 		return // already walked
 	}
 	n.Left = walkexpr(n.Left, init)
 	walkexprlist(n.List.Slice(), init)
 	params := n.Left.Type.Params()
 	args := n.List.Slice()
-	// If there's a ... parameter (which is only valid as the final
+
-	// parameter) and this is not a ... call expression,
+	n.Left = walkexpr(n.Left, init)
-	// then assign the remaining arguments as a slice.
+	walkexprlist(args, init)
 	if nf := params.NumFields(); nf > 0 {
 		if last := params.Field(nf - 1); last.IsDDD() && !n.IsDDD() {
 			// The callsite does not use a ..., but the called function is declared
 			// with a final argument that has a ... . Build the slice that we will
 			// pass as the ... argument.
 			tail := args[nf-1:]
 			slice := mkdotargslice(last.Type, tail, init, n.Right)
 			// Allow immediate GC.
 			for i := range tail {
 				tail[i] = nil
 			}
 			args = append(args[:nf-1], slice)
 		}
 	}
 	// If this is a method call, add the receiver at the beginning of the args.
 	if n.Op == OCALLMETH {
@ -1956,7 +1951,7 @@ func callnew(t *types.Type) *Node {
 	n := nod(ONEWOBJ, typename(t), nil)
 	n.Type = types.NewPtr(t)
 	n.SetTypecheck(1)
-	n.SetNonNil(true)
+	n.MarkNonNil()
 	return n
 }
@ -2645,6 +2640,8 @@ func appendslice(n *Node, init *Nodes) *Node {
 	l1 := n.List.First()
 	l2 := n.List.Second()
 	l2 = cheapexpr(l2, init)
 	n.List.SetSecond(l2)
 	var nodes Nodes
@ -2682,35 +2679,45 @@ func appendslice(n *Node, init *Nodes) *Node {
 	if elemtype.HasHeapPointer() {
 		// copy(s[len(l1):], l2)
 		nptr1 := nod(OSLICE, s, nil)
 		nptr1.Type = s.Type
 		nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
 		nptr1 = cheapexpr(nptr1, &nodes)
 		nptr2 := l2
 		Curfn.Func.setWBPos(n.Pos)
-		// instantiate typedslicecopy(typ *type, dst any, src any) int
+		// instantiate typedslicecopy(typ *type, dstPtr *any, dstLen int, srcPtr *any, srcLen int) int
 		fn := syslook("typedslicecopy")
-		fn = substArgTypes(fn, l1.Type, l2.Type)
+		fn = substArgTypes(fn, l1.Type.Elem(), l2.Type.Elem())
-		ncopy = mkcall1(fn, types.Types[TINT], &nodes, typename(elemtype), nptr1, nptr2)
+		ptr1, len1 := nptr1.slicePtrLen()
 		ptr2, len2 := nptr2.slicePtrLen()
 		ncopy = mkcall1(fn, types.Types[TINT], &nodes, typename(elemtype), ptr1, len1, ptr2, len2)
 	} else if instrumenting && !compiling_runtime {
 		// rely on runtime to instrument copy.
 		// copy(s[len(l1):], l2)
 		nptr1 := nod(OSLICE, s, nil)
 		nptr1.Type = s.Type
 		nptr1.SetSliceBounds(nod(OLEN, l1, nil), nil, nil)
 		nptr1 = cheapexpr(nptr1, &nodes)
 		nptr2 := l2
 		if l2.Type.IsString() {
-			// instantiate func slicestringcopy(to any, fr any) int
+			// instantiate func slicestringcopy(toPtr *byte, toLen int, fr string) int
 			fn := syslook("slicestringcopy")
-			fn = substArgTypes(fn, l1.Type, l2.Type)
+			ptr, len := nptr1.slicePtrLen()
-			ncopy = mkcall1(fn, types.Types[TINT], &nodes, nptr1, nptr2)
+			str := nod(OCONVNOP, nptr2, nil)
 			str.Type = types.Types[TSTRING]
 			ncopy = mkcall1(fn, types.Types[TINT], &nodes, ptr, len, str)
 		} else {
 			// instantiate func slicecopy(to any, fr any, wid uintptr) int
 			fn := syslook("slicecopy")
-			fn = substArgTypes(fn, l1.Type, l2.Type)
+			fn = substArgTypes(fn, l1.Type.Elem(), l2.Type.Elem())
-			ncopy = mkcall1(fn, types.Types[TINT], &nodes, nptr1, nptr2, nodintconst(elemtype.Width))
+			ptr1, len1 := nptr1.slicePtrLen()
 			ptr2, len2 := nptr2.slicePtrLen()
 			ncopy = mkcall1(fn, types.Types[TINT], &nodes, ptr1, len1, ptr2, len2, nodintconst(elemtype.Width))
 		}
 	} else {
@ -3009,20 +3016,31 @@ func walkappend(n *Node, init *Nodes, dst *Node) *Node {
 func copyany(n *Node, init *Nodes, runtimecall bool) *Node {
 	if n.Left.Type.Elem().HasHeapPointer() {
 		Curfn.Func.setWBPos(n.Pos)
-		fn := writebarrierfn("typedslicecopy", n.Left.Type, n.Right.Type)
+		fn := writebarrierfn("typedslicecopy", n.Left.Type.Elem(), n.Right.Type.Elem())
-		return mkcall1(fn, n.Type, init, typename(n.Left.Type.Elem()), n.Left, n.Right)
+		n.Left = cheapexpr(n.Left, init)
 		ptrL, lenL := n.Left.slicePtrLen()
 		n.Right = cheapexpr(n.Right, init)
 		ptrR, lenR := n.Right.slicePtrLen()
 		return mkcall1(fn, n.Type, init, typename(n.Left.Type.Elem()), ptrL, lenL, ptrR, lenR)
 	}
 	if runtimecall {
 		if n.Right.Type.IsString() {
 			fn := syslook("slicestringcopy")
-			fn = substArgTypes(fn, n.Left.Type, n.Right.Type)
+			n.Left = cheapexpr(n.Left, init)
-			return mkcall1(fn, n.Type, init, n.Left, n.Right)
+			ptr, len := n.Left.slicePtrLen()
 			str := nod(OCONVNOP, n.Right, nil)
 			str.Type = types.Types[TSTRING]
 			return mkcall1(fn, n.Type, init, ptr, len, str)
 		}
 		fn := syslook("slicecopy")
-		fn = substArgTypes(fn, n.Left.Type, n.Right.Type)
+		fn = substArgTypes(fn, n.Left.Type.Elem(), n.Right.Type.Elem())
-		return mkcall1(fn, n.Type, init, n.Left, n.Right, nodintconst(n.Left.Type.Elem().Width))
+		n.Left = cheapexpr(n.Left, init)
 		ptrL, lenL := n.Left.slicePtrLen()
 		n.Right = cheapexpr(n.Right, init)
 		ptrR, lenR := n.Right.slicePtrLen()
 		return mkcall1(fn, n.Type, init, ptrL, lenL, ptrR, lenR, nodintconst(n.Left.Type.Elem().Width))
 	}
 	n.Left = walkexpr(n.Left, init)
@ -3145,7 +3163,7 @@ func walkcompare(n *Node, init *Nodes) *Node {
 			eqtype = nod(andor, nonnil, match)
 		}
 		// Check for data equal.
-		eqdata := nod(eq, ifaceData(l, r.Type), r)
+		eqdata := nod(eq, ifaceData(n.Pos, l, r.Type), r)
 		// Put it all together.
 		expr := nod(andor, eqtype, eqdata)
 		n = finishcompare(n, expr, init)
@ -3361,36 +3379,15 @@ func tracecmpArg(n *Node, t *types.Type, init *Nodes) *Node {
 }
 func walkcompareInterface(n *Node, init *Nodes) *Node {
 	// ifaceeq(i1 any-1, i2 any-2) (ret bool);
 	if !types.Identical(n.Left.Type, n.Right.Type) {
 		Fatalf("ifaceeq %v %v %v", n.Op, n.Left.Type, n.Right.Type)
 	}
 	var fn *Node
 	if n.Left.Type.IsEmptyInterface() {
 		fn = syslook("efaceeq")
 	} else {
 		fn = syslook("ifaceeq")
 	}
 	n.Right = cheapexpr(n.Right, init)
 	n.Left = cheapexpr(n.Left, init)
-	lt := nod(OITAB, n.Left, nil)
+	eqtab, eqdata := eqinterface(n.Left, n.Right)
 	rt := nod(OITAB, n.Right, nil)
 	ld := nod(OIDATA, n.Left, nil)
 	rd := nod(OIDATA, n.Right, nil)
 	ld.Type = types.Types[TUNSAFEPTR]
 	rd.Type = types.Types[TUNSAFEPTR]
 	ld.SetTypecheck(1)
 	rd.SetTypecheck(1)
 	call := mkcall1(fn, n.Type, init, lt, ld, rd)
 	// Check itable/type before full compare.
 	// Note: short-circuited because order matters.
 	var cmp *Node
 	if n.Op == OEQ {
-		cmp = nod(OANDAND, nod(OEQ, lt, rt), call)
+		cmp = nod(OANDAND, eqtab, eqdata)
 	} else {
-		cmp = nod(OOROR, nod(ONE, lt, rt), nod(ONOT, call, nil))
+		eqtab.Op = ONE
 		cmp = nod(OOROR, eqtab, nod(ONOT, eqdata, nil))
 	}
 	return finishcompare(n, cmp, init)
 }
@ -3500,27 +3497,16 @@ func walkcompareString(n *Node, init *Nodes) *Node {
 		// prepare for rewrite below
 		n.Left = cheapexpr(n.Left, init)
 		n.Right = cheapexpr(n.Right, init)
-
+		eqlen, eqmem := eqstring(n.Left, n.Right)
 		lstr := conv(n.Left, types.Types[TSTRING])
 		rstr := conv(n.Right, types.Types[TSTRING])
 		lptr := nod(OSPTR, lstr, nil)
 		rptr := nod(OSPTR, rstr, nil)
 		llen := conv(nod(OLEN, lstr, nil), types.Types[TUINTPTR])
 		rlen := conv(nod(OLEN, rstr, nil), types.Types[TUINTPTR])
 		fn := syslook("memequal")
 		fn = substArgTypes(fn, types.Types[TUINT8], types.Types[TUINT8])
 		r = mkcall1(fn, types.Types[TBOOL], init, lptr, rptr, llen)
 		// quick check of len before full compare for == or !=.
 		// memequal then tests equality up to length len.
 		if n.Op == OEQ {
 			// len(left) == len(right) && memequal(left, right, len)
-			r = nod(OANDAND, nod(OEQ, llen, rlen), r)
+			r = nod(OANDAND, eqlen, eqmem)
 		} else {
 			// len(left) != len(right) || !memequal(left, right, len)
-			r = nod(ONOT, r, nil)
+			eqlen.Op = ONE
-			r = nod(OOROR, nod(ONE, llen, rlen), r)
+			r = nod(OOROR, eqlen, nod(ONOT, eqmem, nil))
 		}
 	} else {
 		// sys_cmpstring(s1, s2) :: 0
@ -3639,7 +3625,8 @@ func usemethod(n *Node) {
 	// Note: Don't rely on res0.Type.String() since its formatting depends on multiple factors
 	//       (including global variables such as numImports - was issue #19028).
-	if s := res0.Type.Sym; s != nil && s.Name == "Method" && s.Pkg != nil && s.Pkg.Path == "reflect" {
+	// Also need to check for reflect package itself (see Issue #38515).
 	if s := res0.Type.Sym; s != nil && s.Name == "Method" && isReflectPkg(s.Pkg) {
 		Curfn.Func.SetReflectMethod(true)
 	}
 }
@ -3959,10 +3946,8 @@ func walkCheckPtrArithmetic(n *Node, init *Nodes) *Node {
 	n = cheapexpr(n, init)
-	ddd := nodl(n.Pos, ODDDARG, nil, nil)
+	slice := mkdotargslice(types.NewSlice(types.Types[TUNSAFEPTR]), originals)
-	ddd.Type = types.NewPtr(types.NewArray(types.Types[TUNSAFEPTR], int64(len(originals))))
+	slice.Esc = EscNone
 	ddd.Esc = EscNone
 	slice := mkdotargslice(types.NewSlice(types.Types[TUNSAFEPTR]), originals, init, ddd)
 	init.Append(mkcall("checkptrArithmetic", nil, init, convnop(n, types.Types[TUNSAFEPTR]), slice))
 	// TODO(khr): Mark backing store of slice as dead. This will allow us to reuse
--- a/src/cmd/compile/internal/logopt/log_opts.go
+++ b/src/cmd/compile/internal/logopt/log_opts.go
@ -294,18 +294,23 @@ func checkLogPath(flag, destination string) {
 	dest = destination
 }
-var loggedOpts []LoggedOpt
+var loggedOpts []*LoggedOpt
 var mu = sync.Mutex{} // mu protects loggedOpts.
 func NewLoggedOpt(pos src.XPos, what, pass, fname string, args ...interface{}) *LoggedOpt {
 	pass = strings.Replace(pass, " ", "_", -1)
 	return &LoggedOpt{pos, pass, fname, what, args}
 }
 func LogOpt(pos src.XPos, what, pass, fname string, args ...interface{}) {
 	if Format == None {
 		return
 	}
-	pass = strings.Replace(pass, " ", "_", -1)
+	lo := NewLoggedOpt(pos, what, pass, fname, args...)
 	mu.Lock()
 	defer mu.Unlock()
 	// Because of concurrent calls from back end, no telling what the order will be, but is stable-sorted by outer Pos before use.
-	loggedOpts = append(loggedOpts, LoggedOpt{pos, pass, fname, what, args})
+	loggedOpts = append(loggedOpts, lo)
 }
 func Enabled() bool {
@ -321,7 +326,7 @@ func Enabled() bool {
 // byPos sorts diagnostics by source position.
 type byPos struct {
 	ctxt *obj.Link
-	a    []LoggedOpt
+	a    []*LoggedOpt
 }
 func (x byPos) Len() int { return len(x.a) }
@ -402,15 +407,9 @@ func FlushLoggedOpts(ctxt *obj.Link, slashPkgPath string) {
 		// For LSP, make a subdirectory for the package, and for each file foo.go, create foo.json in that subdirectory.
 		currentFile := ""
 		for _, x := range loggedOpts {
-			posTmp = ctxt.AllPos(x.pos, posTmp)
+			posTmp, p0 := x.parsePos(ctxt, posTmp)
 			// Reverse posTmp to put outermost first.
 			l := len(posTmp)
 			for i := 0; i < l/2; i++ {
 				posTmp[i], posTmp[l-i-1] = posTmp[l-i-1], posTmp[i]
 			}
 			p0 := posTmp[0]
 			p0f := uprootedPath(p0.Filename())
 			if currentFile != p0f {
 				if w != nil {
 					w.Close()
@ -429,16 +428,27 @@ func FlushLoggedOpts(ctxt *obj.Link, slashPkgPath string) {
 			diagnostic.Code = x.what
 			diagnostic.Message = target
-			diagnostic.Range = Range{Start: Position{p0.Line(), p0.Col()},
+			diagnostic.Range = newPointRange(p0)
 				End: Position{p0.Line(), p0.Col()}}
 			diagnostic.RelatedInformation = diagnostic.RelatedInformation[:0]
-			for i := 1; i < l; i++ {
+			appendInlinedPos(posTmp, &diagnostic)
-				p := posTmp[i]
+
-				loc := Location{URI: uriIfy(uprootedPath(p.Filename())),
+			// Diagnostic explanation is stored in RelatedInformation after inlining info
-					Range: Range{Start: Position{p.Line(), p.Col()},
+			if len(x.target) > 1 {
-						End: Position{p.Line(), p.Col()}}}
+				switch y := x.target[1].(type) {
-				diagnostic.RelatedInformation = append(diagnostic.RelatedInformation, DiagnosticRelatedInformation{Location: loc, Message: "inlineLoc"})
+				case []*LoggedOpt:
 					for _, z := range y {
 						posTmp, p0 := z.parsePos(ctxt, posTmp)
 						loc := newLocation(p0)
 						msg := z.what
 						if len(z.target) > 0 {
 							msg = msg + ": " + fmt.Sprint(z.target[0])
 						}
 						diagnostic.RelatedInformation = append(diagnostic.RelatedInformation, DiagnosticRelatedInformation{Location: loc, Message: msg})
 						appendInlinedPos(posTmp, &diagnostic)
 					}
 				}
 			}
 			encoder.Encode(diagnostic)
@ -448,3 +458,33 @@ func FlushLoggedOpts(ctxt *obj.Link, slashPkgPath string) {
 		}
 	}
 }
 func newPointRange(p src.Pos) Range {
 	return Range{Start: Position{p.Line(), p.Col()},
 		End: Position{p.Line(), p.Col()}}
 }
 func newLocation(p src.Pos) Location {
 	loc := Location{URI: uriIfy(uprootedPath(p.Filename())), Range: newPointRange(p)}
 	return loc
 }
 // appendInlinedPos extracts inlining information from posTmp and append it to diagnostic
 func appendInlinedPos(posTmp []src.Pos, diagnostic *Diagnostic) {
 	for i := 1; i < len(posTmp); i++ {
 		p := posTmp[i]
 		loc := newLocation(p)
 		diagnostic.RelatedInformation = append(diagnostic.RelatedInformation, DiagnosticRelatedInformation{Location: loc, Message: "inlineLoc"})
 	}
 }
 func (x *LoggedOpt) parsePos(ctxt *obj.Link, posTmp []src.Pos) ([]src.Pos, src.Pos) {
 	posTmp = ctxt.AllPos(x.pos, posTmp)
 	// Reverse posTmp to put outermost first.
 	l := len(posTmp)
 	for i := 0; i < l/2; i++ {
 		posTmp[i], posTmp[l-i-1] = posTmp[l-i-1], posTmp[i]
 	}
 	p0 := posTmp[0]
 	return posTmp, p0
 }
--- a/src/cmd/compile/internal/logopt/logopt_test.go
+++ b/src/cmd/compile/internal/logopt/logopt_test.go
@ -30,11 +30,28 @@ func foo(w, z *pair) *int {
 	}
 	return &a[0]
 }
 // address taking prevents closure inlining
 func n() int {
 	foo := func() int { return 1 }
 	bar := &foo
 	x := (*bar)() + foo()
 	return x
 }
 `
 func want(t *testing.T, out string, desired string) {
-	if !strings.Contains(out, desired) {
+	// On Windows, Unicode escapes in the JSON output end up "normalized" elsewhere to /u....,
-		t.Errorf("did not see phrase %s in \n%s", desired, out)
+	// so "normalize" what we're looking for to match that.
 	s := strings.ReplaceAll(desired, string(os.PathSeparator), "/")
 	if !strings.Contains(out, s) {
 		t.Errorf("did not see phrase %s in \n%s", s, out)
 	}
 }
 func wantN(t *testing.T, out string, desired string, n int) {
 	if strings.Count(out, desired) != n {
 		t.Errorf("expected exactly %d occurences of %s in \n%s", n, desired, out)
 	}
 }
@ -75,7 +92,70 @@ func TestLogOpt(t *testing.T) {
 	})
 	// replace d (dir)  with t ("tmpdir") and convert path separators to '/'
 	normalize := func(out []byte, d, t string) string {
 		s := string(out)
 		s = strings.ReplaceAll(s, d, t)
 		s = strings.ReplaceAll(s, string(os.PathSeparator), "/")
 		return s
 	}
 	// Ensure that <128 byte copies are not reported and that 128-byte copies are.
 	// Check at both 1 and 8-byte alignments.
 	t.Run("Copy", func(t *testing.T) {
 		const copyCode = `package x
 func s128a1(x *[128]int8) [128]int8 { 
 	return *x
 }
 func s127a1(x *[127]int8) [127]int8 {
 	return *x
 }
 func s16a8(x *[16]int64) [16]int64 {
 	return *x
 }
 func s15a8(x *[15]int64) [15]int64 {
 	return *x
 }
 `
 		copy := filepath.Join(dir, "copy.go")
 		if err := ioutil.WriteFile(copy, []byte(copyCode), 0644); err != nil {
 			t.Fatal(err)
 		}
 		outcopy := filepath.Join(dir, "copy.o")
 		// On not-amd64, test the host architecture and os
 		arches := []string{runtime.GOARCH}
 		goos0 := runtime.GOOS
 		if runtime.GOARCH == "amd64" { // Test many things with "linux" (wasm will get "js")
 			arches = []string{"arm", "arm64", "386", "amd64", "mips", "mips64", "ppc64le", "riscv64", "s390x", "wasm"}
 			goos0 = "linux"
 		}
 		for _, arch := range arches {
 			t.Run(arch, func(t *testing.T) {
 				goos := goos0
 				if arch == "wasm" {
 					goos = "js"
 				}
 				_, err := testCopy(t, dir, arch, goos, copy, outcopy)
 				if err != nil {
 					t.Error("-json=0,file://log/opt should have succeeded")
 				}
 				logged, err := ioutil.ReadFile(filepath.Join(dir, "log", "opt", "x", "copy.json"))
 				if err != nil {
 					t.Error("-json=0,file://log/opt missing expected log file")
 				}
 				slogged := normalize(logged, string(uriIfy(dir)), string(uriIfy("tmpdir")))
 				t.Logf("%s", slogged)
 				want(t, slogged, `{"range":{"start":{"line":3,"character":2},"end":{"line":3,"character":2}},"severity":3,"code":"copy","source":"go compiler","message":"128 bytes"}`)
 				want(t, slogged, `{"range":{"start":{"line":9,"character":2},"end":{"line":9,"character":2}},"severity":3,"code":"copy","source":"go compiler","message":"128 bytes"}`)
 				wantN(t, slogged, `"code":"copy"`, 2)
 			})
 		}
 	})
 	// Some architectures don't fault on nil dereference, so nilchecks are eliminated differently.
 	// The N-way copy test also doesn't need to run N-ways N times.
 	if runtime.GOARCH != "amd64" {
 		return
 	}
@ -83,14 +163,6 @@ func TestLogOpt(t *testing.T) {
 	t.Run("Success", func(t *testing.T) {
 		// This test is supposed to succeed
 		// replace d (dir)  with t ("tmpdir") and convert path separators to '/'
 		normalize := func(out []byte, d, t string) string {
 			s := string(out)
 			s = strings.ReplaceAll(s, d, t)
 			s = strings.ReplaceAll(s, string(os.PathSeparator), "/")
 			return s
 		}
 		// Note 'file://' is the I-Know-What-I-Am-Doing way of specifying a file, also to deal with corner cases for Windows.
 		_, err := testLogOptDir(t, dir, "-json=0,file://log/opt", src, outfile)
 		if err != nil {
@ -103,12 +175,26 @@ func TestLogOpt(t *testing.T) {
 		// All this delicacy with uriIfy and filepath.Join is to get this test to work right on Windows.
 		slogged := normalize(logged, string(uriIfy(dir)), string(uriIfy("tmpdir")))
 		t.Logf("%s", slogged)
-		// below shows proper inlining and nilcheck
+		// below shows proper nilcheck
-		want(t, slogged, `{"range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}},"severity":3,"code":"nilcheck","source":"go compiler","message":"","relatedInformation":[{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":4,"character":11},"end":{"line":4,"character":11}}},"message":"inlineLoc"}]}`)
+		want(t, slogged, `{"range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}},"severity":3,"code":"nilcheck","source":"go compiler","message":"",`+
 			`"relatedInformation":[{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":4,"character":11},"end":{"line":4,"character":11}}},"message":"inlineLoc"}]}`)
 		want(t, slogged, `{"range":{"start":{"line":11,"character":6},"end":{"line":11,"character":6}},"severity":3,"code":"isInBounds","source":"go compiler","message":""}`)
 		want(t, slogged, `{"range":{"start":{"line":7,"character":6},"end":{"line":7,"character":6}},"severity":3,"code":"canInlineFunction","source":"go compiler","message":"cost: 35"}`)
-		want(t, slogged, `{"range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}},"severity":3,"code":"inlineCall","source":"go compiler","message":"x.bar"}`)
+		want(t, slogged, `{"range":{"start":{"line":21,"character":21},"end":{"line":21,"character":21}},"severity":3,"code":"cannotInlineCall","source":"go compiler","message":"foo cannot be inlined (escaping closure variable)"}`)
-		want(t, slogged, `{"range":{"start":{"line":8,"character":9},"end":{"line":8,"character":9}},"severity":3,"code":"inlineCall","source":"go compiler","message":"x.bar"}`)
+		// escape analysis explanation
 		want(t, slogged, `{"range":{"start":{"line":7,"character":13},"end":{"line":7,"character":13}},"severity":3,"code":"leak","source":"go compiler","message":"parameter z leaks to ~r2 with derefs=0",`+
 			`"relatedInformation":[`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:    flow: y = z:"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:      from y = \u003cN\u003e (assign-pair)"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:    flow: ~r1 = y:"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":4,"character":11},"end":{"line":4,"character":11}}},"message":"inlineLoc"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:      from y.b (dot of pointer)"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":4,"character":11},"end":{"line":4,"character":11}}},"message":"inlineLoc"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:      from \u0026y.b (address-of)"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":4,"character":9},"end":{"line":4,"character":9}}},"message":"inlineLoc"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":13},"end":{"line":9,"character":13}}},"message":"escflow:      from ~r1 = \u003cN\u003e (assign-pair)"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":3},"end":{"line":9,"character":3}}},"message":"escflow:    flow: ~r2 = ~r1:"},`+
 			`{"location":{"uri":"file://tmpdir/file.go","range":{"start":{"line":9,"character":3},"end":{"line":9,"character":3}}},"message":"escflow:      from return (*int)(~r1) (return)"}]}`)
 	})
 }
@ -131,3 +217,15 @@ func testLogOptDir(t *testing.T, dir, flag, src, outfile string) (string, error)
 	t.Logf("%s", out)
 	return string(out), err
 }
 func testCopy(t *testing.T, dir, goarch, goos, src, outfile string) (string, error) {
 	// Notice the specified import path "x"
 	run := []string{testenv.GoToolPath(t), "tool", "compile", "-p", "x", "-json=0,file://log/opt", "-o", outfile, src}
 	t.Log(run)
 	cmd := exec.Command(run[0], run[1:]...)
 	cmd.Dir = dir
 	cmd.Env = append(os.Environ(), "GOARCH="+goarch, "GOOS="+goos)
 	out, err := cmd.CombinedOutput()
 	t.Logf("%s", out)
 	return string(out), err
 }
--- a/src/cmd/compile/internal/ppc64/ssa.go
+++ b/src/cmd/compile/internal/ppc64/ssa.go
@ -571,7 +571,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		ssa.OpPPC64ROTL, ssa.OpPPC64ROTLW,
 		ssa.OpPPC64MULHD, ssa.OpPPC64MULHW, ssa.OpPPC64MULHDU, ssa.OpPPC64MULHWU,
 		ssa.OpPPC64FMUL, ssa.OpPPC64FMULS, ssa.OpPPC64FDIV, ssa.OpPPC64FDIVS, ssa.OpPPC64FCPSGN,
-		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV:
+		ssa.OpPPC64AND, ssa.OpPPC64OR, ssa.OpPPC64ANDN, ssa.OpPPC64ORN, ssa.OpPPC64NOR, ssa.OpPPC64XOR, ssa.OpPPC64EQV,
 		ssa.OpPPC64MODUD, ssa.OpPPC64MODSD, ssa.OpPPC64MODUW, ssa.OpPPC64MODSW:
 		r := v.Reg()
 		r1 := v.Args[0].Reg()
 		r2 := v.Args[1].Reg()
@ -654,15 +655,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	case ssa.OpPPC64ANDCCconst:
 		p := s.Prog(v.Op.Asm())
 		p.Reg = v.Args[0].Reg()
-
+		p.From.Type = obj.TYPE_CONST
-		if v.Aux != nil {
+		p.From.Offset = v.AuxInt
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = gc.AuxOffset(v)
 		} else {
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = v.AuxInt
 		}
 		p.To.Type = obj.TYPE_REG
 		p.To.Reg = ppc64.REGTMP // discard result
@ -850,39 +844,226 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		p.From.Type = obj.TYPE_CONST
 		p.From.Offset = v.AuxInt & 3
-	case ssa.OpPPC64LoweredZero:
+	case ssa.OpPPC64LoweredQuadZero, ssa.OpPPC64LoweredQuadZeroShort:
 		// The LoweredQuad code generation
 		// generates STXV instructions on
 		// power9. The Short variation is used
 		// if no loop is generated.
-		// unaligned data doesn't hurt performance
+		// sizes >= 64 generate a loop as follows:
 		// for these instructions on power8 or later
-		// for sizes >= 64 generate a loop as follows:
+		// Set up loop counter in CTR, used by BC
 		// XXLXOR clears VS32
 		//       XXLXOR VS32,VS32,VS32
 		//       MOVD len/64,REG_TMP
 		//       MOVD REG_TMP,CTR
 		//       loop:
 		//       STXV VS32,0(R20)
 		//       STXV VS32,16(R20)
 		//       STXV VS32,32(R20)
 		//       STXV VS32,48(R20)
 		//       ADD  $64,R20
 		//       BC   16, 0, loop
-		// set up loop counter in CTR, used by BC
+		// Bytes per iteration
 		ctr := v.AuxInt / 64
 		// Remainder bytes
 		rem := v.AuxInt % 64
 		// Only generate a loop if there is more
 		// than 1 iteration.
 		if ctr > 1 {
 			// Set up VS32 (V0) to hold 0s
 			p := s.Prog(ppc64.AXXLXOR)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			p.Reg = ppc64.REG_VS32
 			// Set up CTR loop counter
 			p = s.Prog(ppc64.AMOVD)
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = ctr
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REGTMP
 			p = s.Prog(ppc64.AMOVD)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REGTMP
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_CTR
 			// Don't generate padding for
 			// loops with few iterations.
 			if ctr > 3 {
 				p = s.Prog(obj.APCALIGN)
 				p.From.Type = obj.TYPE_CONST
 				p.From.Offset = 16
 			}
 			// generate 4 STXVs to zero 64 bytes
 			var top *obj.Prog
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			//  Save the top of loop
 			if top == nil {
 				top = p
 			}
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = 16
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = 32
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = 48
 			// Increment address for the
 			// 64 bytes just zeroed.
 			p = s.Prog(ppc64.AADD)
 			p.Reg = v.Args[0].Reg()
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = 64
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = v.Args[0].Reg()
 			// Branch back to top of loop
 			// based on CTR
 			// BC with BO_BCTR generates bdnz
 			p = s.Prog(ppc64.ABC)
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = ppc64.BO_BCTR
 			p.Reg = ppc64.REG_R0
 			p.To.Type = obj.TYPE_BRANCH
 			gc.Patch(p, top)
 		}
 		// When ctr == 1 the loop was not generated but
 		// there are at least 64 bytes to clear, so add
 		// that to the remainder to generate the code
 		// to clear those doublewords
 		if ctr == 1 {
 			rem += 64
 		}
 		// Clear the remainder starting at offset zero
 		offset := int64(0)
 		if rem >= 16 && ctr <= 1 {
 			// If the XXLXOR hasn't already been
 			// generated, do it here to initialize
 			// VS32 (V0) to 0.
 			p := s.Prog(ppc64.AXXLXOR)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			p.Reg = ppc64.REG_VS32
 		}
 		// Generate STXV for 32 or 64
 		// bytes.
 		for rem >= 32 {
 			p := s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = offset
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = offset + 16
 			offset += 32
 			rem -= 32
 		}
 		// Generate 16 bytes
 		if rem >= 16 {
 			p := s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = offset
 			offset += 16
 			rem -= 16
 		}
 		// first clear as many doublewords as possible
 		// then clear remaining sizes as available
 		for rem > 0 {
 			op, size := ppc64.AMOVB, int64(1)
 			switch {
 			case rem >= 8:
 				op, size = ppc64.AMOVD, 8
 			case rem >= 4:
 				op, size = ppc64.AMOVW, 4
 			case rem >= 2:
 				op, size = ppc64.AMOVH, 2
 			}
 			p := s.Prog(op)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_R0
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = v.Args[0].Reg()
 			p.To.Offset = offset
 			rem -= size
 			offset += size
 		}
 	case ssa.OpPPC64LoweredZero, ssa.OpPPC64LoweredZeroShort:
 		// Unaligned data doesn't hurt performance
 		// for these instructions on power8.
 		// For sizes >= 64 generate a loop as follows:
 		// Set up loop counter in CTR, used by BC
 		//       XXLXOR VS32,VS32,VS32
 		//	 MOVD len/32,REG_TMP
 		//	 MOVD REG_TMP,CTR
 		//       MOVD $16,REG_TMP
 		//	 loop:
-		//	 STXVD2X VS32,(R0)(R3)
+		//	 STXVD2X VS32,(R0)(R20)
-		//	 STXVD2X VS32,(R31)(R3)
+		//	 STXVD2X VS32,(R31)(R20)
-		//	 ADD  $32,R3
+		//	 ADD  $32,R20
 		//	 BC   16, 0, loop
 		//
 		// any remainder is done as described below
 		// for sizes < 64 bytes, first clear as many doublewords as possible,
 		// then handle the remainder
-		//	MOVD R0,(R3)
+		//	MOVD R0,(R20)
-		//	MOVD R0,8(R3)
+		//	MOVD R0,8(R20)
 		// .... etc.
 		//
 		// the remainder bytes are cleared using one or more
 		// of the following instructions with the appropriate
 		// offsets depending which instructions are needed
 		//
-		//	MOVW R0,n1(R3)	4 bytes
+		//	MOVW R0,n1(R20)	4 bytes
-		//	MOVH R0,n2(R3)	2 bytes
+		//	MOVH R0,n2(R20)	2 bytes
-		//	MOVB R0,n3(R3)	1 byte
+		//	MOVB R0,n3(R20)	1 byte
 		//
 		// 7 bytes: MOVW, MOVH, MOVB
 		// 6 bytes: MOVW, MOVH
@ -926,10 +1107,19 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REGTMP
 			// Don't add padding for alignment
 			// with few loop iterations.
 			if ctr > 3 {
 				p = s.Prog(obj.APCALIGN)
 				p.From.Type = obj.TYPE_CONST
 				p.From.Offset = 16
 			}
 			// generate 2 STXVD2Xs to store 16 bytes
 			// when this is a loop then the top must be saved
 			var top *obj.Prog
 			// This is the top of loop
 			p = s.Prog(ppc64.ASTXVD2X)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
@ -940,7 +1130,6 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			if top == nil {
 				top = p
 			}
 			p = s.Prog(ppc64.ASTXVD2X)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
@ -1001,8 +1190,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			offset += size
 		}
-	case ssa.OpPPC64LoweredMove:
+	case ssa.OpPPC64LoweredMove, ssa.OpPPC64LoweredMoveShort:
 		bytesPerLoop := int64(32)
 		// This will be used when moving more
 		// than 8 bytes.  Moves start with
 		// as many 8 byte moves as possible, then
@ -1019,34 +1209,34 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		//	MOVD REG_TMP,CTR
 		//	MOVD $16,REG_TMP
 		// top:
-		//	LXVD2X (R0)(R4),VS32
+		//	LXVD2X (R0)(R21),VS32
-		//	LXVD2X (R31)(R4),VS33
+		//	LXVD2X (R31)(R21),VS33
-		//	ADD $32,R4
+		//	ADD $32,R21
-		//	STXVD2X VS32,(R0)(R3)
+		//	STXVD2X VS32,(R0)(R20)
-		//	STXVD2X VS33,(R31)(R4)
+		//	STXVD2X VS33,(R31)(R20)
-		//	ADD $32,R3
+		//	ADD $32,R20
 		//	BC 16,0,top
 		// Bytes not moved by this loop are moved
 		// with a combination of the following instructions,
 		// starting with the largest sizes and generating as
 		// many as needed, using the appropriate offset value.
-		//	MOVD  n(R4),R14
+		//	MOVD  n(R21),R31
-		//	MOVD  R14,n(R3)
+		//	MOVD  R31,n(R20)
-		//	MOVW  n1(R4),R14
+		//	MOVW  n1(R21),R31
-		//	MOVW  R14,n1(R3)
+		//	MOVW  R31,n1(R20)
-		//	MOVH  n2(R4),R14
+		//	MOVH  n2(R21),R31
-		//	MOVH  R14,n2(R3)
+		//	MOVH  R31,n2(R20)
-		//	MOVB  n3(R4),R14
+		//	MOVB  n3(R21),R31
-		//	MOVB  R14,n3(R3)
+		//	MOVB  R31,n3(R20)
 		// Each loop iteration moves 32 bytes
-		ctr := v.AuxInt / 32
+		ctr := v.AuxInt / bytesPerLoop
 		// Remainder after the loop
-		rem := v.AuxInt % 32
+		rem := v.AuxInt % bytesPerLoop
-		dst_reg := v.Args[0].Reg()
+		dstReg := v.Args[0].Reg()
-		src_reg := v.Args[1].Reg()
+		srcReg := v.Args[1].Reg()
 		// The set of registers used here, must match the clobbered reg list
 		// in PPC64Ops.go.
@ -1076,57 +1266,65 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REGTMP
 			// Don't adding padding for
 			// alignment with small iteration
 			// counts.
 			if ctr > 3 {
 				p = s.Prog(obj.APCALIGN)
 				p.From.Type = obj.TYPE_CONST
 				p.From.Offset = 16
 			}
 			// Generate 16 byte loads and stores.
 			// Use temp register for index (16)
 			// on the second one.
 			p = s.Prog(ppc64.ALXVD2X)
 			p.From.Type = obj.TYPE_MEM
-			p.From.Reg = src_reg
+			p.From.Reg = srcReg
 			p.From.Index = ppc64.REGZERO
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			if top == nil {
 				top = p
 			}
 			p = s.Prog(ppc64.ALXVD2X)
 			p.From.Type = obj.TYPE_MEM
-			p.From.Reg = src_reg
+			p.From.Reg = srcReg
 			p.From.Index = ppc64.REGTMP
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS33
 			// increment the src reg for next iteration
 			p = s.Prog(ppc64.AADD)
-			p.Reg = src_reg
+			p.Reg = srcReg
 			p.From.Type = obj.TYPE_CONST
-			p.From.Offset = 32
+			p.From.Offset = bytesPerLoop
 			p.To.Type = obj.TYPE_REG
-			p.To.Reg = src_reg
+			p.To.Reg = srcReg
 			// generate 16 byte stores
 			p = s.Prog(ppc64.ASTXVD2X)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
-			p.To.Reg = dst_reg
+			p.To.Reg = dstReg
 			p.To.Index = ppc64.REGZERO
 			p = s.Prog(ppc64.ASTXVD2X)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS33
 			p.To.Type = obj.TYPE_MEM
-			p.To.Reg = dst_reg
+			p.To.Reg = dstReg
 			p.To.Index = ppc64.REGTMP
 			// increment the dst reg for next iteration
 			p = s.Prog(ppc64.AADD)
-			p.Reg = dst_reg
+			p.Reg = dstReg
 			p.From.Type = obj.TYPE_CONST
-			p.From.Offset = 32
+			p.From.Offset = bytesPerLoop
 			p.To.Type = obj.TYPE_REG
-			p.To.Reg = dst_reg
+			p.To.Reg = dstReg
 			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
 			// to loop top.
@ -1137,7 +1335,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.To.Type = obj.TYPE_BRANCH
 			gc.Patch(p, top)
-			// src_reg and dst_reg were incremented in the loop, so
+			// srcReg and dstReg were incremented in the loop, so
 			// later instructions start with offset 0.
 			offset = int64(0)
 		}
@ -1145,7 +1343,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 		// No loop was generated for one iteration, so
 		// add 32 bytes to the remainder to move those bytes.
 		if ctr == 1 {
-			rem += 32
+			rem += bytesPerLoop
 		}
 		if rem >= 16 {
@ -1154,7 +1352,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			// on the second one.
 			p := s.Prog(ppc64.ALXVD2X)
 			p.From.Type = obj.TYPE_MEM
-			p.From.Reg = src_reg
+			p.From.Reg = srcReg
 			p.From.Index = ppc64.REGZERO
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
@ -1163,7 +1361,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
-			p.To.Reg = dst_reg
+			p.To.Reg = dstReg
 			p.To.Index = ppc64.REGZERO
 			offset = 16
@ -1171,18 +1369,15 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			if rem >= 16 {
 				// Use REGTMP as index reg
-				p = s.Prog(ppc64.AMOVD)
+				p := s.Prog(ppc64.AMOVD)
 				p.From.Type = obj.TYPE_CONST
 				p.From.Offset = 16
 				p.To.Type = obj.TYPE_REG
 				p.To.Reg = ppc64.REGTMP
 				// Generate 16 byte loads and stores.
 				// Use temp register for index (16)
 				// on the second one.
 				p = s.Prog(ppc64.ALXVD2X)
 				p.From.Type = obj.TYPE_MEM
-				p.From.Reg = src_reg
+				p.From.Reg = srcReg
 				p.From.Index = ppc64.REGTMP
 				p.To.Type = obj.TYPE_REG
 				p.To.Reg = ppc64.REG_VS32
@ -1191,7 +1386,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 				p.From.Type = obj.TYPE_REG
 				p.From.Reg = ppc64.REG_VS32
 				p.To.Type = obj.TYPE_MEM
-				p.To.Reg = dst_reg
+				p.To.Reg = dstReg
 				p.To.Index = ppc64.REGTMP
 				offset = 32
@ -1214,17 +1409,284 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 			// Load
 			p := s.Prog(op)
 			p.To.Type = obj.TYPE_REG
-			p.To.Reg = ppc64.REG_R14
+			p.To.Reg = ppc64.REGTMP
 			p.From.Type = obj.TYPE_MEM
-			p.From.Reg = src_reg
+			p.From.Reg = srcReg
 			p.From.Offset = offset
 			// Store
 			p = s.Prog(op)
 			p.From.Type = obj.TYPE_REG
-			p.From.Reg = ppc64.REG_R14
+			p.From.Reg = ppc64.REGTMP
 			p.To.Type = obj.TYPE_MEM
-			p.To.Reg = dst_reg
+			p.To.Reg = dstReg
 			p.To.Offset = offset
 			rem -= size
 			offset += size
 		}
 	case ssa.OpPPC64LoweredQuadMove, ssa.OpPPC64LoweredQuadMoveShort:
 		bytesPerLoop := int64(64)
 		// This is used when moving more
 		// than 8 bytes on power9.  Moves start with
 		// as many 8 byte moves as possible, then
 		// 4, 2, or 1 byte(s) as remaining.  This will
 		// work and be efficient for power8 or later.
 		// If there are 64 or more bytes, then a
 		// loop is generated to move 32 bytes and
 		// update the src and dst addresses on each
 		// iteration. When < 64 bytes, the appropriate
 		// number of moves are generated based on the
 		// size.
 		// When moving >= 64 bytes a loop is used
 		//      MOVD len/32,REG_TMP
 		//      MOVD REG_TMP,CTR
 		// top:
 		//      LXV 0(R21),VS32
 		//      LXV 16(R21),VS33
 		//      ADD $32,R21
 		//      STXV VS32,0(R20)
 		//      STXV VS33,16(R20)
 		//      ADD $32,R20
 		//      BC 16,0,top
 		// Bytes not moved by this loop are moved
 		// with a combination of the following instructions,
 		// starting with the largest sizes and generating as
 		// many as needed, using the appropriate offset value.
 		//      MOVD  n(R21),R31
 		//      MOVD  R31,n(R20)
 		//      MOVW  n1(R21),R31
 		//      MOVW  R31,n1(R20)
 		//      MOVH  n2(R21),R31
 		//      MOVH  R31,n2(R20)
 		//      MOVB  n3(R21),R31
 		//      MOVB  R31,n3(R20)
 		// Each loop iteration moves 32 bytes
 		ctr := v.AuxInt / bytesPerLoop
 		// Remainder after the loop
 		rem := v.AuxInt % bytesPerLoop
 		dstReg := v.Args[0].Reg()
 		srcReg := v.Args[1].Reg()
 		offset := int64(0)
 		// top of the loop
 		var top *obj.Prog
 		// Only generate looping code when loop counter is > 1 for >= 64 bytes
 		if ctr > 1 {
 			// Set up the CTR
 			p := s.Prog(ppc64.AMOVD)
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = ctr
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REGTMP
 			p = s.Prog(ppc64.AMOVD)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REGTMP
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_CTR
 			p = s.Prog(obj.APCALIGN)
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = 16
 			// Generate 16 byte loads and stores.
 			p = s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			if top == nil {
 				top = p
 			}
 			p = s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset + 16
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS33
 			// generate 16 byte stores
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS33
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset + 16
 			// Generate 16 byte loads and stores.
 			p = s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset + 32
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			p = s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset + 48
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS33
 			// generate 16 byte stores
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset + 32
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS33
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset + 48
 			// increment the src reg for next iteration
 			p = s.Prog(ppc64.AADD)
 			p.Reg = srcReg
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = bytesPerLoop
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = srcReg
 			// increment the dst reg for next iteration
 			p = s.Prog(ppc64.AADD)
 			p.Reg = dstReg
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = bytesPerLoop
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = dstReg
 			// BC with BO_BCTR generates bdnz to branch on nonzero CTR
 			// to loop top.
 			p = s.Prog(ppc64.ABC)
 			p.From.Type = obj.TYPE_CONST
 			p.From.Offset = ppc64.BO_BCTR
 			p.Reg = ppc64.REG_R0
 			p.To.Type = obj.TYPE_BRANCH
 			gc.Patch(p, top)
 			// srcReg and dstReg were incremented in the loop, so
 			// later instructions start with offset 0.
 			offset = int64(0)
 		}
 		// No loop was generated for one iteration, so
 		// add 32 bytes to the remainder to move those bytes.
 		if ctr == 1 {
 			rem += bytesPerLoop
 		}
 		if rem >= 32 {
 			p := s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			p = s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = 16
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS33
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS33
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = 16
 			offset = 32
 			rem -= 32
 		}
 		if rem >= 16 {
 			// Generate 16 byte loads and stores.
 			p := s.Prog(ppc64.ALXV)
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REG_VS32
 			p = s.Prog(ppc64.ASTXV)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REG_VS32
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset
 			offset += 16
 			rem -= 16
 			if rem >= 16 {
 				p := s.Prog(ppc64.ALXV)
 				p.From.Type = obj.TYPE_MEM
 				p.From.Reg = srcReg
 				p.From.Offset = offset
 				p.To.Type = obj.TYPE_REG
 				p.To.Reg = ppc64.REG_VS32
 				p = s.Prog(ppc64.ASTXV)
 				p.From.Type = obj.TYPE_REG
 				p.From.Reg = ppc64.REG_VS32
 				p.To.Type = obj.TYPE_MEM
 				p.To.Reg = dstReg
 				p.To.Offset = offset
 				offset += 16
 				rem -= 16
 			}
 		}
 		// Generate all the remaining load and store pairs, starting with
 		// as many 8 byte moves as possible, then 4, 2, 1.
 		for rem > 0 {
 			op, size := ppc64.AMOVB, int64(1)
 			switch {
 			case rem >= 8:
 				op, size = ppc64.AMOVD, 8
 			case rem >= 4:
 				op, size = ppc64.AMOVW, 4
 			case rem >= 2:
 				op, size = ppc64.AMOVH, 2
 			}
 			// Load
 			p := s.Prog(op)
 			p.To.Type = obj.TYPE_REG
 			p.To.Reg = ppc64.REGTMP
 			p.From.Type = obj.TYPE_MEM
 			p.From.Reg = srcReg
 			p.From.Offset = offset
 			// Store
 			p = s.Prog(op)
 			p.From.Type = obj.TYPE_REG
 			p.From.Reg = ppc64.REGTMP
 			p.To.Type = obj.TYPE_MEM
 			p.To.Reg = dstReg
 			p.To.Offset = offset
 			rem -= size
 			offset += size
--- a/src/cmd/compile/internal/riscv64/ssa.go
+++ b/src/cmd/compile/internal/riscv64/ssa.go
@ -577,6 +577,21 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	}
 }
 var blockBranch = [...]obj.As{
 	ssa.BlockRISCV64BEQ:  riscv.ABEQ,
 	ssa.BlockRISCV64BEQZ: riscv.ABEQZ,
 	ssa.BlockRISCV64BGE:  riscv.ABGE,
 	ssa.BlockRISCV64BGEU: riscv.ABGEU,
 	ssa.BlockRISCV64BGEZ: riscv.ABGEZ,
 	ssa.BlockRISCV64BGTZ: riscv.ABGTZ,
 	ssa.BlockRISCV64BLEZ: riscv.ABLEZ,
 	ssa.BlockRISCV64BLT:  riscv.ABLT,
 	ssa.BlockRISCV64BLTU: riscv.ABLTU,
 	ssa.BlockRISCV64BLTZ: riscv.ABLTZ,
 	ssa.BlockRISCV64BNE:  riscv.ABNE,
 	ssa.BlockRISCV64BNEZ: riscv.ABNEZ,
 }
 func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
 	s.SetPos(b.Pos)
@ -610,27 +625,44 @@ func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
 		p.To.Type = obj.TYPE_MEM
 		p.To.Name = obj.NAME_EXTERN
 		p.To.Sym = b.Aux.(*obj.LSym)
-	case ssa.BlockRISCV64BNE:
+	case ssa.BlockRISCV64BEQ, ssa.BlockRISCV64BEQZ, ssa.BlockRISCV64BNE, ssa.BlockRISCV64BNEZ,
 		ssa.BlockRISCV64BLT, ssa.BlockRISCV64BLEZ, ssa.BlockRISCV64BGE, ssa.BlockRISCV64BGEZ,
 		ssa.BlockRISCV64BLTZ, ssa.BlockRISCV64BGTZ, ssa.BlockRISCV64BLTU, ssa.BlockRISCV64BGEU:
 		as := blockBranch[b.Kind]
 		invAs := riscv.InvertBranch(as)
 		var p *obj.Prog
 		switch next {
 		case b.Succs[0].Block():
-			p = s.Br(riscv.ABNE, b.Succs[1].Block())
+			p = s.Br(invAs, b.Succs[1].Block())
 			p.As = riscv.InvertBranch(p.As)
 		case b.Succs[1].Block():
-			p = s.Br(riscv.ABNE, b.Succs[0].Block())
+			p = s.Br(as, b.Succs[0].Block())
 		default:
 			if b.Likely != ssa.BranchUnlikely {
-				p = s.Br(riscv.ABNE, b.Succs[0].Block())
+				p = s.Br(as, b.Succs[0].Block())
 				s.Br(obj.AJMP, b.Succs[1].Block())
 			} else {
-				p = s.Br(riscv.ABNE, b.Succs[1].Block())
+				p = s.Br(invAs, b.Succs[1].Block())
 				p.As = riscv.InvertBranch(p.As)
 				s.Br(obj.AJMP, b.Succs[0].Block())
 			}
 		}
-		p.Reg = b.Controls[0].Reg()
+
 		p.From.Type = obj.TYPE_REG
-		p.From.Reg = riscv.REG_ZERO
+		switch b.Kind {
 		case ssa.BlockRISCV64BEQ, ssa.BlockRISCV64BNE, ssa.BlockRISCV64BLT, ssa.BlockRISCV64BGE, ssa.BlockRISCV64BLTU, ssa.BlockRISCV64BGEU:
 			if b.NumControls() != 2 {
 				b.Fatalf("Unexpected number of controls (%d != 2): %s", b.NumControls(), b.LongString())
 			}
 			p.From.Reg = b.Controls[0].Reg()
 			p.Reg = b.Controls[1].Reg()
 		case ssa.BlockRISCV64BEQZ, ssa.BlockRISCV64BNEZ, ssa.BlockRISCV64BGEZ, ssa.BlockRISCV64BLEZ, ssa.BlockRISCV64BLTZ, ssa.BlockRISCV64BGTZ:
 			if b.NumControls() != 1 {
 				b.Fatalf("Unexpected number of controls (%d != 1): %s", b.NumControls(), b.LongString())
 			}
 			p.From.Reg = b.Controls[0].Reg()
 		}
 	default:
 		b.Fatalf("Unhandled block: %s", b.LongString())
--- a/src/cmd/compile/internal/s390x/ssa.go
+++ b/src/cmd/compile/internal/s390x/ssa.go
@ -234,13 +234,19 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
 	// 2-address opcode arithmetic
 	case ssa.OpS390XMULLD, ssa.OpS390XMULLW,
 		ssa.OpS390XMULHD, ssa.OpS390XMULHDU,
 		ssa.OpS390XFADDS, ssa.OpS390XFADD, ssa.OpS390XFSUBS, ssa.OpS390XFSUB,
 		ssa.OpS390XFMULS, ssa.OpS390XFMUL, ssa.OpS390XFDIVS, ssa.OpS390XFDIV:
 		r := v.Reg()
 		if r != v.Args[0].Reg() {
 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
 		}
 		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
 	case ssa.OpS390XFSUBS, ssa.OpS390XFSUB,
 		ssa.OpS390XFADDS, ssa.OpS390XFADD:
 		r := v.Reg0()
 		if r != v.Args[0].Reg() {
 			v.Fatalf("input[0] and output not in same register %s", v.LongString())
 		}
 		opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
 	case ssa.OpS390XMLGR:
 		// MLGR Rx R3 -> R2:R3
 		r0 := v.Args[0].Reg()
--- a/src/cmd/compile/internal/ssa/addressingmodes.go
+++ b/src/cmd/compile/internal/ssa/addressingmodes.go
@ -87,6 +87,13 @@ func addressingModes(f *Func) {
 			v.resetArgs()
 			v.Op = c
 			v.AddArgs(tmp...)
 			if needSplit[c] {
 				// It turns out that some of the combined instructions have faster two-instruction equivalents,
 				// but not the two instructions that led to them being combined here.  For example
 				// (CMPBconstload c (ADDQ x y)) -> (CMPBconstloadidx1 c x y) -> (CMPB c (MOVBloadidx1 x y))
 				// The final pair of instructions turns out to be notably faster, at least in some benchmarks.
 				f.Config.splitLoad(v)
 			}
 		}
 	}
 }
@ -101,6 +108,26 @@ func init() {
 	}
 }
 // needSplit contains instructions that should be postprocessed by splitLoad
 // into a more-efficient two-instruction form.
 var needSplit = map[Op]bool{
 	OpAMD64CMPBloadidx1: true,
 	OpAMD64CMPWloadidx1: true,
 	OpAMD64CMPLloadidx1: true,
 	OpAMD64CMPQloadidx1: true,
 	OpAMD64CMPWloadidx2: true,
 	OpAMD64CMPLloadidx4: true,
 	OpAMD64CMPQloadidx8: true,
 	OpAMD64CMPBconstloadidx1: true,
 	OpAMD64CMPWconstloadidx1: true,
 	OpAMD64CMPLconstloadidx1: true,
 	OpAMD64CMPQconstloadidx1: true,
 	OpAMD64CMPWconstloadidx2: true,
 	OpAMD64CMPLconstloadidx4: true,
 	OpAMD64CMPQconstloadidx8: true,
 }
 // For each entry k, v in this map, if we have a value x with:
 //   x.Op == k[0]
 //   x.Args[0].Op == k[1]
@ -162,6 +189,8 @@ var combine = map[[2]Op]Op{
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
 	[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
 	// These instructions are re-split differently for performance, see needSplit above.
 	// TODO if 386 versions are created, also update needSplit and gen/386splitload.rules
 	[2]Op{OpAMD64CMPBload, OpAMD64ADDQ}: OpAMD64CMPBloadidx1,
 	[2]Op{OpAMD64CMPWload, OpAMD64ADDQ}: OpAMD64CMPWloadidx1,
 	[2]Op{OpAMD64CMPLload, OpAMD64ADDQ}: OpAMD64CMPLloadidx1,
@ -188,6 +217,110 @@ var combine = map[[2]Op]Op{
 	[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ1}: OpAMD64CMPQconstloadidx1,
 	[2]Op{OpAMD64CMPQconstload, OpAMD64LEAQ8}: OpAMD64CMPQconstloadidx8,
 	[2]Op{OpAMD64ADDLload, OpAMD64ADDQ}: OpAMD64ADDLloadidx1,
 	[2]Op{OpAMD64ADDQload, OpAMD64ADDQ}: OpAMD64ADDQloadidx1,
 	[2]Op{OpAMD64SUBLload, OpAMD64ADDQ}: OpAMD64SUBLloadidx1,
 	[2]Op{OpAMD64SUBQload, OpAMD64ADDQ}: OpAMD64SUBQloadidx1,
 	[2]Op{OpAMD64ANDLload, OpAMD64ADDQ}: OpAMD64ANDLloadidx1,
 	[2]Op{OpAMD64ANDQload, OpAMD64ADDQ}: OpAMD64ANDQloadidx1,
 	[2]Op{OpAMD64ORLload, OpAMD64ADDQ}:  OpAMD64ORLloadidx1,
 	[2]Op{OpAMD64ORQload, OpAMD64ADDQ}:  OpAMD64ORQloadidx1,
 	[2]Op{OpAMD64XORLload, OpAMD64ADDQ}: OpAMD64XORLloadidx1,
 	[2]Op{OpAMD64XORQload, OpAMD64ADDQ}: OpAMD64XORQloadidx1,
 	[2]Op{OpAMD64ADDLload, OpAMD64LEAQ1}: OpAMD64ADDLloadidx1,
 	[2]Op{OpAMD64ADDLload, OpAMD64LEAQ4}: OpAMD64ADDLloadidx4,
 	[2]Op{OpAMD64ADDLload, OpAMD64LEAQ8}: OpAMD64ADDLloadidx8,
 	[2]Op{OpAMD64ADDQload, OpAMD64LEAQ1}: OpAMD64ADDQloadidx1,
 	[2]Op{OpAMD64ADDQload, OpAMD64LEAQ8}: OpAMD64ADDQloadidx8,
 	[2]Op{OpAMD64SUBLload, OpAMD64LEAQ1}: OpAMD64SUBLloadidx1,
 	[2]Op{OpAMD64SUBLload, OpAMD64LEAQ4}: OpAMD64SUBLloadidx4,
 	[2]Op{OpAMD64SUBLload, OpAMD64LEAQ8}: OpAMD64SUBLloadidx8,
 	[2]Op{OpAMD64SUBQload, OpAMD64LEAQ1}: OpAMD64SUBQloadidx1,
 	[2]Op{OpAMD64SUBQload, OpAMD64LEAQ8}: OpAMD64SUBQloadidx8,
 	[2]Op{OpAMD64ANDLload, OpAMD64LEAQ1}: OpAMD64ANDLloadidx1,
 	[2]Op{OpAMD64ANDLload, OpAMD64LEAQ4}: OpAMD64ANDLloadidx4,
 	[2]Op{OpAMD64ANDLload, OpAMD64LEAQ8}: OpAMD64ANDLloadidx8,
 	[2]Op{OpAMD64ANDQload, OpAMD64LEAQ1}: OpAMD64ANDQloadidx1,
 	[2]Op{OpAMD64ANDQload, OpAMD64LEAQ8}: OpAMD64ANDQloadidx8,
 	[2]Op{OpAMD64ORLload, OpAMD64LEAQ1}:  OpAMD64ORLloadidx1,
 	[2]Op{OpAMD64ORLload, OpAMD64LEAQ4}:  OpAMD64ORLloadidx4,
 	[2]Op{OpAMD64ORLload, OpAMD64LEAQ8}:  OpAMD64ORLloadidx8,
 	[2]Op{OpAMD64ORQload, OpAMD64LEAQ1}:  OpAMD64ORQloadidx1,
 	[2]Op{OpAMD64ORQload, OpAMD64LEAQ8}:  OpAMD64ORQloadidx8,
 	[2]Op{OpAMD64XORLload, OpAMD64LEAQ1}: OpAMD64XORLloadidx1,
 	[2]Op{OpAMD64XORLload, OpAMD64LEAQ4}: OpAMD64XORLloadidx4,
 	[2]Op{OpAMD64XORLload, OpAMD64LEAQ8}: OpAMD64XORLloadidx8,
 	[2]Op{OpAMD64XORQload, OpAMD64LEAQ1}: OpAMD64XORQloadidx1,
 	[2]Op{OpAMD64XORQload, OpAMD64LEAQ8}: OpAMD64XORQloadidx8,
 	[2]Op{OpAMD64ADDLmodify, OpAMD64ADDQ}: OpAMD64ADDLmodifyidx1,
 	[2]Op{OpAMD64ADDQmodify, OpAMD64ADDQ}: OpAMD64ADDQmodifyidx1,
 	[2]Op{OpAMD64SUBLmodify, OpAMD64ADDQ}: OpAMD64SUBLmodifyidx1,
 	[2]Op{OpAMD64SUBQmodify, OpAMD64ADDQ}: OpAMD64SUBQmodifyidx1,
 	[2]Op{OpAMD64ANDLmodify, OpAMD64ADDQ}: OpAMD64ANDLmodifyidx1,
 	[2]Op{OpAMD64ANDQmodify, OpAMD64ADDQ}: OpAMD64ANDQmodifyidx1,
 	[2]Op{OpAMD64ORLmodify, OpAMD64ADDQ}:  OpAMD64ORLmodifyidx1,
 	[2]Op{OpAMD64ORQmodify, OpAMD64ADDQ}:  OpAMD64ORQmodifyidx1,
 	[2]Op{OpAMD64XORLmodify, OpAMD64ADDQ}: OpAMD64XORLmodifyidx1,
 	[2]Op{OpAMD64XORQmodify, OpAMD64ADDQ}: OpAMD64XORQmodifyidx1,
 	[2]Op{OpAMD64ADDLmodify, OpAMD64LEAQ1}: OpAMD64ADDLmodifyidx1,
 	[2]Op{OpAMD64ADDLmodify, OpAMD64LEAQ4}: OpAMD64ADDLmodifyidx4,
 	[2]Op{OpAMD64ADDLmodify, OpAMD64LEAQ8}: OpAMD64ADDLmodifyidx8,
 	[2]Op{OpAMD64ADDQmodify, OpAMD64LEAQ1}: OpAMD64ADDQmodifyidx1,
 	[2]Op{OpAMD64ADDQmodify, OpAMD64LEAQ8}: OpAMD64ADDQmodifyidx8,
 	[2]Op{OpAMD64SUBLmodify, OpAMD64LEAQ1}: OpAMD64SUBLmodifyidx1,
 	[2]Op{OpAMD64SUBLmodify, OpAMD64LEAQ4}: OpAMD64SUBLmodifyidx4,
 	[2]Op{OpAMD64SUBLmodify, OpAMD64LEAQ8}: OpAMD64SUBLmodifyidx8,
 	[2]Op{OpAMD64SUBQmodify, OpAMD64LEAQ1}: OpAMD64SUBQmodifyidx1,
 	[2]Op{OpAMD64SUBQmodify, OpAMD64LEAQ8}: OpAMD64SUBQmodifyidx8,
 	[2]Op{OpAMD64ANDLmodify, OpAMD64LEAQ1}: OpAMD64ANDLmodifyidx1,
 	[2]Op{OpAMD64ANDLmodify, OpAMD64LEAQ4}: OpAMD64ANDLmodifyidx4,
 	[2]Op{OpAMD64ANDLmodify, OpAMD64LEAQ8}: OpAMD64ANDLmodifyidx8,
 	[2]Op{OpAMD64ANDQmodify, OpAMD64LEAQ1}: OpAMD64ANDQmodifyidx1,
 	[2]Op{OpAMD64ANDQmodify, OpAMD64LEAQ8}: OpAMD64ANDQmodifyidx8,
 	[2]Op{OpAMD64ORLmodify, OpAMD64LEAQ1}:  OpAMD64ORLmodifyidx1,
 	[2]Op{OpAMD64ORLmodify, OpAMD64LEAQ4}:  OpAMD64ORLmodifyidx4,
 	[2]Op{OpAMD64ORLmodify, OpAMD64LEAQ8}:  OpAMD64ORLmodifyidx8,
 	[2]Op{OpAMD64ORQmodify, OpAMD64LEAQ1}:  OpAMD64ORQmodifyidx1,
 	[2]Op{OpAMD64ORQmodify, OpAMD64LEAQ8}:  OpAMD64ORQmodifyidx8,
 	[2]Op{OpAMD64XORLmodify, OpAMD64LEAQ1}: OpAMD64XORLmodifyidx1,
 	[2]Op{OpAMD64XORLmodify, OpAMD64LEAQ4}: OpAMD64XORLmodifyidx4,
 	[2]Op{OpAMD64XORLmodify, OpAMD64LEAQ8}: OpAMD64XORLmodifyidx8,
 	[2]Op{OpAMD64XORQmodify, OpAMD64LEAQ1}: OpAMD64XORQmodifyidx1,
 	[2]Op{OpAMD64XORQmodify, OpAMD64LEAQ8}: OpAMD64XORQmodifyidx8,
 	[2]Op{OpAMD64ADDLconstmodify, OpAMD64ADDQ}: OpAMD64ADDLconstmodifyidx1,
 	[2]Op{OpAMD64ADDQconstmodify, OpAMD64ADDQ}: OpAMD64ADDQconstmodifyidx1,
 	[2]Op{OpAMD64ANDLconstmodify, OpAMD64ADDQ}: OpAMD64ANDLconstmodifyidx1,
 	[2]Op{OpAMD64ANDQconstmodify, OpAMD64ADDQ}: OpAMD64ANDQconstmodifyidx1,
 	[2]Op{OpAMD64ORLconstmodify, OpAMD64ADDQ}:  OpAMD64ORLconstmodifyidx1,
 	[2]Op{OpAMD64ORQconstmodify, OpAMD64ADDQ}:  OpAMD64ORQconstmodifyidx1,
 	[2]Op{OpAMD64XORLconstmodify, OpAMD64ADDQ}: OpAMD64XORLconstmodifyidx1,
 	[2]Op{OpAMD64XORQconstmodify, OpAMD64ADDQ}: OpAMD64XORQconstmodifyidx1,
 	[2]Op{OpAMD64ADDLconstmodify, OpAMD64LEAQ1}: OpAMD64ADDLconstmodifyidx1,
 	[2]Op{OpAMD64ADDLconstmodify, OpAMD64LEAQ4}: OpAMD64ADDLconstmodifyidx4,
 	[2]Op{OpAMD64ADDLconstmodify, OpAMD64LEAQ8}: OpAMD64ADDLconstmodifyidx8,
 	[2]Op{OpAMD64ADDQconstmodify, OpAMD64LEAQ1}: OpAMD64ADDQconstmodifyidx1,
 	[2]Op{OpAMD64ADDQconstmodify, OpAMD64LEAQ8}: OpAMD64ADDQconstmodifyidx8,
 	[2]Op{OpAMD64ANDLconstmodify, OpAMD64LEAQ1}: OpAMD64ANDLconstmodifyidx1,
 	[2]Op{OpAMD64ANDLconstmodify, OpAMD64LEAQ4}: OpAMD64ANDLconstmodifyidx4,
 	[2]Op{OpAMD64ANDLconstmodify, OpAMD64LEAQ8}: OpAMD64ANDLconstmodifyidx8,
 	[2]Op{OpAMD64ANDQconstmodify, OpAMD64LEAQ1}: OpAMD64ANDQconstmodifyidx1,
 	[2]Op{OpAMD64ANDQconstmodify, OpAMD64LEAQ8}: OpAMD64ANDQconstmodifyidx8,
 	[2]Op{OpAMD64ORLconstmodify, OpAMD64LEAQ1}:  OpAMD64ORLconstmodifyidx1,
 	[2]Op{OpAMD64ORLconstmodify, OpAMD64LEAQ4}:  OpAMD64ORLconstmodifyidx4,
 	[2]Op{OpAMD64ORLconstmodify, OpAMD64LEAQ8}:  OpAMD64ORLconstmodifyidx8,
 	[2]Op{OpAMD64ORQconstmodify, OpAMD64LEAQ1}:  OpAMD64ORQconstmodifyidx1,
 	[2]Op{OpAMD64ORQconstmodify, OpAMD64LEAQ8}:  OpAMD64ORQconstmodifyidx8,
 	[2]Op{OpAMD64XORLconstmodify, OpAMD64LEAQ1}: OpAMD64XORLconstmodifyidx1,
 	[2]Op{OpAMD64XORLconstmodify, OpAMD64LEAQ4}: OpAMD64XORLconstmodifyidx4,
 	[2]Op{OpAMD64XORLconstmodify, OpAMD64LEAQ8}: OpAMD64XORLconstmodifyidx8,
 	[2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ1}: OpAMD64XORQconstmodifyidx1,
 	[2]Op{OpAMD64XORQconstmodify, OpAMD64LEAQ8}: OpAMD64XORQconstmodifyidx8,
 	// 386
 	[2]Op{Op386MOVBload, Op386ADDL}:  Op386MOVBloadidx1,
 	[2]Op{Op386MOVWload, Op386ADDL}:  Op386MOVWloadidx1,
--- a/src/cmd/compile/internal/ssa/block.go
+++ b/src/cmd/compile/internal/ssa/block.go
@ -124,15 +124,8 @@ func (b *Block) LongString() string {
 	if b.Aux != nil {
 		s += fmt.Sprintf(" {%s}", b.Aux)
 	}
-	if t := b.Kind.AuxIntType(); t != "" {
+	if t := b.AuxIntString(); t != "" {
-		switch t {
+		s += fmt.Sprintf(" [%s]", t)
 		case "Int8":
 			s += fmt.Sprintf(" [%v]", int8(b.AuxInt))
 		case "UInt8":
 			s += fmt.Sprintf(" [%v]", uint8(b.AuxInt))
 		default:
 			s += fmt.Sprintf(" [%v]", b.AuxInt)
 		}
 	}
 	for _, c := range b.ControlValues() {
 		s += fmt.Sprintf(" %s", c)
@ -263,6 +256,17 @@ func (b *Block) resetWithControl2(kind BlockKind, v, w *Value) {
 	w.Uses++
 }
 // truncateValues truncates b.Values at the ith element, zeroing subsequent elements.
 // The values in b.Values after i must already have had their args reset,
 // to maintain correct value uses counts.
 func (b *Block) truncateValues(i int) {
 	tail := b.Values[i:]
 	for j := range tail {
 		tail[j] = nil
 	}
 	b.Values = b.Values[:i]
 }
 // AddEdgeTo adds an edge from block b to block c. Used during building of the
 // SSA graph; do not use on an already-completed SSA graph.
 func (b *Block) AddEdgeTo(c *Block) {
@ -341,6 +345,19 @@ func (b *Block) LackingPos() bool {
 	return true
 }
 func (b *Block) AuxIntString() string {
 	switch b.Kind.AuxIntType() {
 	case "int8":
 		return fmt.Sprintf("%v", int8(b.AuxInt))
 	case "uint8":
 		return fmt.Sprintf("%v", uint8(b.AuxInt))
 	default: // type specified but not implemented - print as int64
 		return fmt.Sprintf("%v", b.AuxInt)
 	case "": // no aux int type
 		return ""
 	}
 }
 func (b *Block) Logf(msg string, args ...interface{})   { b.Func.Logf(msg, args...) }
 func (b *Block) Log() bool                              { return b.Func.Log() }
 func (b *Block) Fatalf(msg string, args ...interface{}) { b.Func.Fatalf(msg, args...) }
--- a/src/cmd/compile/internal/ssa/check.go
+++ b/src/cmd/compile/internal/ssa/check.go
@ -5,6 +5,7 @@
 package ssa
 import (
 	"cmd/internal/obj/s390x"
 	"math"
 	"math/bits"
 )
@ -119,6 +120,7 @@ func checkFunc(f *Func) {
 			// Check to make sure aux values make sense.
 			canHaveAux := false
 			canHaveAuxInt := false
 			// TODO: enforce types of Aux in this switch (like auxString does below)
 			switch opcodeTable[v.Op].auxType {
 			case auxNone:
 			case auxBool:
@ -158,7 +160,12 @@ func checkFunc(f *Func) {
 				if math.IsNaN(v.AuxFloat()) {
 					f.Fatalf("value %v has an AuxInt that encodes a NaN", v)
 				}
-			case auxString, auxSym, auxTyp, auxArchSpecific:
+			case auxString:
 				if _, ok := v.Aux.(string); !ok {
 					f.Fatalf("value %v has Aux type %T, want string", v, v.Aux)
 				}
 				canHaveAux = true
 			case auxSym, auxTyp:
 				canHaveAux = true
 			case auxSymOff, auxSymValAndOff, auxTypSize:
 				canHaveAuxInt = true
@ -168,6 +175,16 @@ func checkFunc(f *Func) {
 					f.Fatalf("bad type %T for CCop in %v", v.Aux, v)
 				}
 				canHaveAux = true
 			case auxS390XCCMask:
 				if _, ok := v.Aux.(s390x.CCMask); !ok {
 					f.Fatalf("bad type %T for S390XCCMask in %v", v.Aux, v)
 				}
 				canHaveAux = true
 			case auxS390XRotateParams:
 				if _, ok := v.Aux.(s390x.RotateParams); !ok {
 					f.Fatalf("bad type %T for S390XRotateParams in %v", v.Aux, v)
 				}
 				canHaveAux = true
 			default:
 				f.Fatalf("unknown aux type for %s", v.Op)
 			}
--- a/src/cmd/compile/internal/ssa/compile.go
+++ b/src/cmd/compile/internal/ssa/compile.go
@ -55,7 +55,7 @@ func Compile(f *Func) {
 	if f.Log() {
 		printFunc(f)
 	}
-	f.HTMLWriter.WriteFunc("start", "start", f)
+	f.HTMLWriter.WritePhase("start", "start")
 	if BuildDump != "" && BuildDump == f.Name {
 		f.dumpFile("build")
 	}
@ -111,7 +111,7 @@ func Compile(f *Func) {
 				f.Logf("  pass %s end %s\n", p.name, stats)
 				printFunc(f)
 			}
-			f.HTMLWriter.WriteFunc(phaseName, fmt.Sprintf("%s <span class=\"stats\">%s</span>", phaseName, stats), f)
+			f.HTMLWriter.WritePhase(phaseName, fmt.Sprintf("%s <span class=\"stats\">%s</span>", phaseName, stats))
 		}
 		if p.time || p.mem {
 			// Surround timing information w/ enough context to allow comparisons.
@ -136,6 +136,11 @@ func Compile(f *Func) {
 		}
 	}
 	if f.HTMLWriter != nil {
 		// Ensure we write any pending phases to the html
 		f.HTMLWriter.flushPhases()
 	}
 	if f.ruleMatches != nil {
 		var keys []string
 		for key := range f.ruleMatches {
--- a/src/cmd/compile/internal/ssa/config.go
+++ b/src/cmd/compile/internal/ssa/config.go
@ -135,7 +135,7 @@ type Frontend interface {
 	Logger
 	// StringData returns a symbol pointing to the given string's contents.
-	StringData(string) interface{} // returns *gc.Sym
+	StringData(string) *obj.LSym
 	// Auto returns a Node for an auto variable of the given type.
 	// The SSA compiler uses this function to allocate space for spills.
--- a/src/cmd/compile/internal/ssa/deadcode.go
+++ b/src/cmd/compile/internal/ssa/deadcode.go
@ -242,8 +242,9 @@ func deadcode(f *Func) {
 			f.NamedValues[name] = values[:j]
 		}
 	}
-	for k := len(f.Names) - 1; k >= i; k-- {
+	clearNames := f.Names[i:]
-		f.Names[k] = LocalSlot{}
+	for j := range clearNames {
 		clearNames[j] = LocalSlot{}
 	}
 	f.Names = f.Names[:i]
@ -295,12 +296,7 @@ func deadcode(f *Func) {
 				f.freeValue(v)
 			}
 		}
-		// aid GC
+		b.truncateValues(i)
 		tail := b.Values[i:]
 		for j := range tail {
 			tail[j] = nil
 		}
 		b.Values = b.Values[:i]
 	}
 	// Remove dead blocks from WBLoads list.
@ -311,8 +307,9 @@ func deadcode(f *Func) {
 			i++
 		}
 	}
-	for j := i; j < len(f.WBLoads); j++ {
+	clearWBLoads := f.WBLoads[i:]
-		f.WBLoads[j] = nil
+	for j := range clearWBLoads {
 		clearWBLoads[j] = nil
 	}
 	f.WBLoads = f.WBLoads[:i]
--- a/src/cmd/compile/internal/ssa/debug_test.go
+++ b/src/cmd/compile/internal/ssa/debug_test.go
@ -95,6 +95,8 @@ var optimizedLibs = (!strings.Contains(gogcflags, "-N") && !strings.Contains(gog
 // go test debug_test.go -args -u -d
 func TestNexting(t *testing.T) {
 	testenv.SkipFlaky(t, 37404)
 	skipReasons := "" // Many possible skip reasons, list all that apply
 	if testing.Short() {
 		skipReasons = "not run in short mode; "
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@ -90,7 +90,7 @@ func (d *DummyAuto) IsAutoTmp() bool {
 	return true
 }
-func (DummyFrontend) StringData(s string) interface{} {
+func (DummyFrontend) StringData(s string) *obj.LSym {
 	return nil
 }
 func (DummyFrontend) Auto(pos src.XPos, t *types.Type) GCNode {
--- a/src/cmd/compile/internal/ssa/flagalloc.go
+++ b/src/cmd/compile/internal/ssa/flagalloc.go
@ -106,7 +106,7 @@ func flagalloc(f *Func) {
 	}
 	// Add flag spill and recomputation where they are needed.
-	// TODO: Remove original instructions if they are never used.
+	var remove []*Value // values that should be checked for possible removal
 	var oldSched []*Value
 	for _, b := range f.Blocks {
 		oldSched = append(oldSched[:0], b.Values...)
@ -131,6 +131,7 @@ func flagalloc(f *Func) {
 			// If v will be spilled, and v uses memory, then we must split it
 			// into a load + a flag generator.
 			if spill[v.ID] && v.MemoryArg() != nil {
 				remove = append(remove, v)
 				if !f.Config.splitLoad(v) {
 					f.Fatalf("can't split flag generator: %s", v.LongString())
 				}
@ -164,6 +165,7 @@ func flagalloc(f *Func) {
 		for i, v := range b.ControlValues() {
 			if v != flag && v.Type.IsFlags() {
 				// Recalculate control value.
 				remove = append(remove, v)
 				c := copyFlags(v, b)
 				b.ReplaceControl(i, c)
 				flag = v
@ -172,12 +174,15 @@ func flagalloc(f *Func) {
 		if v := end[b.ID]; v != nil && v != flag {
 			// Need to reissue flag generator for use by
 			// subsequent blocks.
 			remove = append(remove, v)
 			copyFlags(v, b)
 			// Note: this flag generator is not properly linked up
 			// with the flag users. This breaks the SSA representation.
 			// We could fix up the users with another pass, but for now
-			// we'll just leave it.  (Regalloc has the same issue for
+			// we'll just leave it. (Regalloc has the same issue for
 			// standard regs, and it runs next.)
 			// For this reason, take care not to add this flag
 			// generator to the remove list.
 		}
 	}
@ -185,6 +190,58 @@ func flagalloc(f *Func) {
 	for _, b := range f.Blocks {
 		b.FlagsLiveAtEnd = end[b.ID] != nil
 	}
 	const go115flagallocdeadcode = true
 	if !go115flagallocdeadcode {
 		return
 	}
 	// Remove any now-dead values.
 	// The number of values to remove is likely small,
 	// and removing them requires processing all values in a block,
 	// so minimize the number of blocks that we touch.
 	// Shrink remove to contain only dead values, and clobber those dead values.
 	for i := 0; i < len(remove); i++ {
 		v := remove[i]
 		if v.Uses == 0 {
 			v.reset(OpInvalid)
 			continue
 		}
 		// Remove v.
 		last := len(remove) - 1
 		remove[i] = remove[last]
 		remove[last] = nil
 		remove = remove[:last]
 		i-- // reprocess value at i
 	}
 	if len(remove) == 0 {
 		return
 	}
 	removeBlocks := f.newSparseSet(f.NumBlocks())
 	defer f.retSparseSet(removeBlocks)
 	for _, v := range remove {
 		removeBlocks.add(v.Block.ID)
 	}
 	// Process affected blocks, preserving value order.
 	for _, b := range f.Blocks {
 		if !removeBlocks.contains(b.ID) {
 			continue
 		}
 		i := 0
 		for j := 0; j < len(b.Values); j++ {
 			v := b.Values[j]
 			if v.Op == OpInvalid {
 				continue
 			}
 			b.Values[i] = v
 			i++
 		}
 		b.truncateValues(i)
 	}
 }
 func (v *Value) clobbersFlags() bool {
--- a/src/cmd/compile/internal/ssa/fuse.go
+++ b/src/cmd/compile/internal/ssa/fuse.go
@ -20,6 +20,7 @@ const (
 	fuseTypePlain fuseType = 1 << iota
 	fuseTypeIf
 	fuseTypeIntInRange
 	fuseTypeShortCircuit
 )
 // fuse simplifies control flow by joining basic blocks.
@ -38,6 +39,9 @@ func fuse(f *Func, typ fuseType) {
 			if typ&fuseTypePlain != 0 {
 				changed = fuseBlockPlain(b) || changed
 			}
 			if typ&fuseTypeShortCircuit != 0 {
 				changed = shortcircuitBlock(b) || changed
 			}
 		}
 		if changed {
 			f.invalidateCFG()
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@ -531,13 +531,13 @@ func init() {
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 		// Extend ops are the same as Bounds ops except the indexes are 64-bit.
-		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, dx, bx}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, dx, bx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, cx, dx}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, cx, dx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, ax, cx}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{si, ax, cx}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
--- a/src/cmd/compile/internal/ssa/gen/386splitload.rules
+++ b/src/cmd/compile/internal/ssa/gen/386splitload.rules
@ -4,6 +4,8 @@
 // See the top of AMD64splitload.rules for discussion of these rules.
-(CMP(L|W|B)load {sym} [off] ptr x mem) -> (CMP(L|W|B) (MOV(L|W|B)load {sym} [off] ptr mem) x)
+(CMP(L|W|B)load {sym} [off] ptr x mem) => (CMP(L|W|B) (MOV(L|W|B)load {sym} [off] ptr mem) x)
-(CMP(L|W|B)constload {sym} [vo] ptr mem) -> (CMP(L|W|B)const (MOV(L|W|B)load {sym} [offOnly(vo)] ptr mem) [valOnly(vo)])
+(CMPLconstload {sym} [vo] ptr mem) => (CMPLconst (MOVLload {sym} [vo.Off32()] ptr mem) [vo.Val32()])
 (CMPWconstload {sym} [vo] ptr mem) => (CMPWconst (MOVWload {sym} [vo.Off32()] ptr mem) [vo.Val16()])
 (CMPBconstload {sym} [vo] ptr mem) => (CMPBconst (MOVBload {sym} [vo.Off32()] ptr mem) [vo.Val8()])
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@ -3,311 +3,302 @@
 // license that can be found in the LICENSE file.
 // Lowering arithmetic
-(Add(64|32|16|8) ...) -> (ADD(Q|L|L|L) ...)
+(Add(64|32|16|8) ...) => (ADD(Q|L|L|L) ...)
-(AddPtr ...) -> (ADDQ ...)
+(AddPtr ...) => (ADDQ ...)
-(Add(32|64)F ...) -> (ADDS(S|D) ...)
+(Add(32|64)F ...) => (ADDS(S|D) ...)
-(Sub(64|32|16|8) ...) -> (SUB(Q|L|L|L) ...)
+(Sub(64|32|16|8) ...) => (SUB(Q|L|L|L) ...)
-(SubPtr ...) -> (SUBQ ...)
+(SubPtr ...) => (SUBQ ...)
-(Sub(32|64)F ...) -> (SUBS(S|D) ...)
+(Sub(32|64)F ...) => (SUBS(S|D) ...)
-(Mul(64|32|16|8) ...) -> (MUL(Q|L|L|L) ...)
+(Mul(64|32|16|8) ...) => (MUL(Q|L|L|L) ...)
-(Mul(32|64)F ...) -> (MULS(S|D) ...)
+(Mul(32|64)F ...) => (MULS(S|D) ...)
-(Select0 (Mul64uover x y)) -> (Select0 <typ.UInt64> (MULQU x y))
+(Select0 (Mul64uover x y)) => (Select0 <typ.UInt64> (MULQU x y))
-(Select0 (Mul32uover x y)) -> (Select0 <typ.UInt32> (MULLU x y))
+(Select0 (Mul32uover x y)) => (Select0 <typ.UInt32> (MULLU x y))
-(Select1 (Mul(64|32)uover x y)) -> (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y)))
+(Select1 (Mul(64|32)uover x y)) => (SETO (Select1 <types.TypeFlags> (MUL(Q|L)U x y)))
-(Hmul(64|32) ...) -> (HMUL(Q|L) ...)
+(Hmul(64|32) ...) => (HMUL(Q|L) ...)
-(Hmul(64|32)u ...) -> (HMUL(Q|L)U ...)
+(Hmul(64|32)u ...) => (HMUL(Q|L)U ...)
-(Div(64|32|16) [a] x y) -> (Select0 (DIV(Q|L|W) [a] x y))
+(Div(64|32|16) [a] x y) => (Select0 (DIV(Q|L|W) [a] x y))
-(Div8  x y) -> (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
+(Div8  x y) => (Select0 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Div(64|32|16)u x y) -> (Select0 (DIV(Q|L|W)U x y))
+(Div(64|32|16)u x y) => (Select0 (DIV(Q|L|W)U x y))
-(Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Div8u x y) => (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-(Div(32|64)F ...) -> (DIVS(S|D) ...)
+(Div(32|64)F ...) => (DIVS(S|D) ...)
-(Select0 (Add64carry x y c)) ->
+(Select0 (Add64carry x y c)) =>
 	(Select0 <typ.UInt64> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-(Select1 (Add64carry x y c)) ->
+(Select1 (Add64carry x y c)) =>
 	(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (ADCQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
-(Select0 (Sub64borrow x y c)) ->
+(Select0 (Sub64borrow x y c)) =>
 	(Select0 <typ.UInt64> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))
-(Select1 (Sub64borrow x y c)) ->
+(Select1 (Sub64borrow x y c)) =>
 	(NEGQ <typ.UInt64> (SBBQcarrymask <typ.UInt64> (Select1 <types.TypeFlags> (SBBQ x y (Select1 <types.TypeFlags> (NEGLflags c))))))
 // Optimize ADCQ and friends
-(ADCQ x (MOVQconst [c]) carry) && is32Bit(c) -> (ADCQconst x [c] carry)
+(ADCQ x (MOVQconst [c]) carry) && is32Bit(c) => (ADCQconst x [int32(c)] carry)
-(ADCQ x y (FlagEQ)) -> (ADDQcarry x y)
+(ADCQ x y (FlagEQ)) => (ADDQcarry x y)
-(ADCQconst x [c] (FlagEQ)) -> (ADDQconstcarry x [c])
+(ADCQconst x [c] (FlagEQ)) => (ADDQconstcarry x [c])
-(ADDQcarry x (MOVQconst [c])) && is32Bit(c) -> (ADDQconstcarry x [c])
+(ADDQcarry x (MOVQconst [c])) && is32Bit(c) => (ADDQconstcarry x [int32(c)])
-(SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) -> (SBBQconst x [c] borrow)
+(SBBQ x (MOVQconst [c]) borrow) && is32Bit(c) => (SBBQconst x [int32(c)] borrow)
-(SBBQ x y (FlagEQ)) -> (SUBQborrow x y)
+(SBBQ x y (FlagEQ)) => (SUBQborrow x y)
-(SBBQconst x [c] (FlagEQ)) -> (SUBQconstborrow x [c])
+(SBBQconst x [c] (FlagEQ)) => (SUBQconstborrow x [c])
-(SUBQborrow x (MOVQconst [c])) && is32Bit(c) -> (SUBQconstborrow x [c])
+(SUBQborrow x (MOVQconst [c])) && is32Bit(c) => (SUBQconstborrow x [int32(c)])
-(Select1 (NEGLflags (MOVQconst [0]))) -> (FlagEQ)
+(Select1 (NEGLflags (MOVQconst [0]))) => (FlagEQ)
-(Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) -> x
+(Select1 (NEGLflags (NEGQ (SBBQcarrymask x)))) => x
-(Mul64uhilo ...) -> (MULQU2 ...)
+(Mul64uhilo ...) => (MULQU2 ...)
-(Div128u ...) -> (DIVQU2 ...)
+(Div128u ...) => (DIVQU2 ...)
-(Avg64u ...) -> (AVGQU ...)
+(Avg64u ...) => (AVGQU ...)
-(Mod(64|32|16) [a] x y) -> (Select1 (DIV(Q|L|W) [a] x y))
+(Mod(64|32|16) [a] x y) => (Select1 (DIV(Q|L|W) [a] x y))
-(Mod8  x y) -> (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
+(Mod8  x y) => (Select1 (DIVW  (SignExt8to16 x) (SignExt8to16 y)))
-(Mod(64|32|16)u x y) -> (Select1 (DIV(Q|L|W)U x y))
+(Mod(64|32|16)u x y) => (Select1 (DIV(Q|L|W)U x y))
-(Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
+(Mod8u x y) => (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y)))
-(And(64|32|16|8) ...) -> (AND(Q|L|L|L) ...)
+(And(64|32|16|8) ...) => (AND(Q|L|L|L) ...)
-(Or(64|32|16|8) ...) -> (OR(Q|L|L|L) ...)
+(Or(64|32|16|8) ...) => (OR(Q|L|L|L) ...)
-(Xor(64|32|16|8) ...) -> (XOR(Q|L|L|L) ...)
+(Xor(64|32|16|8) ...) => (XOR(Q|L|L|L) ...)
-(Com(64|32|16|8) ...) -> (NOT(Q|L|L|L) ...)
+(Com(64|32|16|8) ...) => (NOT(Q|L|L|L) ...)
-(Neg(64|32|16|8) ...) -> (NEG(Q|L|L|L) ...)
+(Neg(64|32|16|8) ...) => (NEG(Q|L|L|L) ...)
-(Neg32F x) -> (PXOR x (MOVSSconst <typ.Float32> [auxFrom32F(float32(math.Copysign(0, -1)))]))
+(Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
-(Neg64F x) -> (PXOR x (MOVSDconst <typ.Float64> [auxFrom64F(math.Copysign(0, -1))]))
+(Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
 // Lowering boolean ops
-(AndB ...) -> (ANDL ...)
+(AndB ...) => (ANDL ...)
-(OrB ...) -> (ORL ...)
+(OrB ...) => (ORL ...)
-(Not x) -> (XORLconst [1] x)
+(Not x) => (XORLconst [1] x)
 // Lowering pointer arithmetic
-(OffPtr [off] ptr) && is32Bit(off) -> (ADDQconst [off] ptr)
+(OffPtr [off] ptr) && is32Bit(off) => (ADDQconst [int32(off)] ptr)
-(OffPtr [off] ptr) -> (ADDQ (MOVQconst [off]) ptr)
+(OffPtr [off] ptr) => (ADDQ (MOVQconst [off]) ptr)
 // Lowering other arithmetic
-(Ctz64 <t> x) -> (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
+(Ctz64 <t> x) => (CMOVQEQ (Select0 <t> (BSFQ x)) (MOVQconst <t> [64]) (Select1 <types.TypeFlags> (BSFQ x)))
-(Ctz32 x) -> (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
+(Ctz32 x) => (Select0 (BSFQ (BTSQconst <typ.UInt64> [32] x)))
-(Ctz16 x) -> (BSFL (BTSLconst <typ.UInt32> [16] x))
+(Ctz16 x) => (BSFL (BTSLconst <typ.UInt32> [16] x))
-(Ctz8  x) -> (BSFL (BTSLconst <typ.UInt32> [ 8] x))
+(Ctz8  x) => (BSFL (BTSLconst <typ.UInt32> [ 8] x))
-(Ctz64NonZero x) -> (Select0 (BSFQ x))
+(Ctz64NonZero x) => (Select0 (BSFQ x))
-(Ctz32NonZero ...) -> (BSFL ...)
+(Ctz32NonZero ...) => (BSFL ...)
-(Ctz16NonZero ...) -> (BSFL ...)
+(Ctz16NonZero ...) => (BSFL ...)
-(Ctz8NonZero  ...) -> (BSFL ...)
+(Ctz8NonZero  ...) => (BSFL ...)
 // BitLen64 of a 64 bit value x requires checking whether x == 0, since BSRQ is undefined when x == 0.
 // However, for zero-extended values, we can cheat a bit, and calculate
 // BSR(x<<1 + 1), which is guaranteed to be non-zero, and which conveniently
 // places the index of the highest set bit where we want it.
-(BitLen64 <t> x) -> (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
+(BitLen64 <t> x) => (ADDQconst [1] (CMOVQEQ <t> (Select0 <t> (BSRQ x)) (MOVQconst <t> [-1]) (Select1 <types.TypeFlags> (BSRQ x))))
-(BitLen32 x) -> (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
+(BitLen32 x) => (Select0 (BSRQ (LEAQ1 <typ.UInt64> [1] (MOVLQZX <typ.UInt64> x) (MOVLQZX <typ.UInt64> x))))
-(BitLen16 x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
+(BitLen16 x) => (BSRL (LEAL1 <typ.UInt32> [1] (MOVWQZX <typ.UInt32> x) (MOVWQZX <typ.UInt32> x)))
-(BitLen8  x) -> (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
+(BitLen8  x) => (BSRL (LEAL1 <typ.UInt32> [1] (MOVBQZX <typ.UInt32> x) (MOVBQZX <typ.UInt32> x)))
-(Bswap(64|32) ...) -> (BSWAP(Q|L) ...)
+(Bswap(64|32) ...) => (BSWAP(Q|L) ...)
-(PopCount(64|32) ...) -> (POPCNT(Q|L) ...)
+(PopCount(64|32) ...) => (POPCNT(Q|L) ...)
-(PopCount16 x) -> (POPCNTL (MOVWQZX <typ.UInt32> x))
+(PopCount16 x) => (POPCNTL (MOVWQZX <typ.UInt32> x))
-(PopCount8 x) -> (POPCNTL (MOVBQZX <typ.UInt32> x))
+(PopCount8 x) => (POPCNTL (MOVBQZX <typ.UInt32> x))
-(Sqrt ...) -> (SQRTSD ...)
+(Sqrt ...) => (SQRTSD ...)
-(RoundToEven x) -> (ROUNDSD [0] x)
+(RoundToEven x) => (ROUNDSD [0] x)
-(Floor x)	-> (ROUNDSD [1] x)
+(Floor x)       => (ROUNDSD [1] x)
-(Ceil x)  	-> (ROUNDSD [2] x)
+(Ceil x)        => (ROUNDSD [2] x)
-(Trunc x) 	-> (ROUNDSD [3] x)
+(Trunc x)       => (ROUNDSD [3] x)
-(FMA x y z) -> (VFMADD231SD z x y)
+
 (FMA x y z) => (VFMADD231SD z x y)
 // Lowering extension
 // Note: we always extend to 64 bits even though some ops don't need that many result bits.
-(SignExt8to16  ...) -> (MOVBQSX ...)
+(SignExt8to16  ...) => (MOVBQSX ...)
-(SignExt8to32  ...) -> (MOVBQSX ...)
+(SignExt8to32  ...) => (MOVBQSX ...)
-(SignExt8to64  ...) -> (MOVBQSX ...)
+(SignExt8to64  ...) => (MOVBQSX ...)
-(SignExt16to32 ...) -> (MOVWQSX ...)
+(SignExt16to32 ...) => (MOVWQSX ...)
-(SignExt16to64 ...) -> (MOVWQSX ...)
+(SignExt16to64 ...) => (MOVWQSX ...)
-(SignExt32to64 ...) -> (MOVLQSX ...)
+(SignExt32to64 ...) => (MOVLQSX ...)
-(ZeroExt8to16  ...) -> (MOVBQZX ...)
+(ZeroExt8to16  ...) => (MOVBQZX ...)
-(ZeroExt8to32  ...) -> (MOVBQZX ...)
+(ZeroExt8to32  ...) => (MOVBQZX ...)
-(ZeroExt8to64  ...) -> (MOVBQZX ...)
+(ZeroExt8to64  ...) => (MOVBQZX ...)
-(ZeroExt16to32 ...) -> (MOVWQZX ...)
+(ZeroExt16to32 ...) => (MOVWQZX ...)
-(ZeroExt16to64 ...) -> (MOVWQZX ...)
+(ZeroExt16to64 ...) => (MOVWQZX ...)
-(ZeroExt32to64 ...) -> (MOVLQZX ...)
+(ZeroExt32to64 ...) => (MOVLQZX ...)
-(Slicemask <t> x) -> (SARQconst (NEGQ <t> x) [63])
+(Slicemask <t> x) => (SARQconst (NEGQ <t> x) [63])
-(SpectreIndex <t> x y) -> (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
+(SpectreIndex <t> x y) => (CMOVQCC x (MOVQconst [0]) (CMPQ x y))
-(SpectreSliceIndex <t> x y) -> (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
+(SpectreSliceIndex <t> x y) => (CMOVQHI x (MOVQconst [0]) (CMPQ x y))
 // Lowering truncation
 // Because we ignore high parts of registers, truncates are just copies.
-(Trunc16to8  ...) -> (Copy ...)
+(Trunc16to8  ...) => (Copy ...)
-(Trunc32to8  ...) -> (Copy ...)
+(Trunc32to8  ...) => (Copy ...)
-(Trunc32to16 ...) -> (Copy ...)
+(Trunc32to16 ...) => (Copy ...)
-(Trunc64to8  ...) -> (Copy ...)
+(Trunc64to8  ...) => (Copy ...)
-(Trunc64to16 ...) -> (Copy ...)
+(Trunc64to16 ...) => (Copy ...)
-(Trunc64to32 ...) -> (Copy ...)
+(Trunc64to32 ...) => (Copy ...)
 // Lowering float <-> int
-(Cvt32to32F ...) -> (CVTSL2SS ...)
+(Cvt32to32F ...) => (CVTSL2SS ...)
-(Cvt32to64F ...) -> (CVTSL2SD ...)
+(Cvt32to64F ...) => (CVTSL2SD ...)
-(Cvt64to32F ...) -> (CVTSQ2SS ...)
+(Cvt64to32F ...) => (CVTSQ2SS ...)
-(Cvt64to64F ...) -> (CVTSQ2SD ...)
+(Cvt64to64F ...) => (CVTSQ2SD ...)
-(Cvt32Fto32 ...) -> (CVTTSS2SL ...)
+(Cvt32Fto32 ...) => (CVTTSS2SL ...)
-(Cvt32Fto64 ...) -> (CVTTSS2SQ ...)
+(Cvt32Fto64 ...) => (CVTTSS2SQ ...)
-(Cvt64Fto32 ...) -> (CVTTSD2SL ...)
+(Cvt64Fto32 ...) => (CVTTSD2SL ...)
-(Cvt64Fto64 ...) -> (CVTTSD2SQ ...)
+(Cvt64Fto64 ...) => (CVTTSD2SQ ...)
-(Cvt32Fto64F ...) -> (CVTSS2SD ...)
+(Cvt32Fto64F ...) => (CVTSS2SD ...)
-(Cvt64Fto32F ...) -> (CVTSD2SS ...)
+(Cvt64Fto32F ...) => (CVTSD2SS ...)
-(Round(32|64)F ...) -> (Copy ...)
+(Round(32|64)F ...) => (Copy ...)
-(CvtBoolToUint8 ...) -> (Copy ...)
+(CvtBoolToUint8 ...) => (Copy ...)
 // Lowering shifts
 // Unsigned shifts need to return 0 if shift amount is >= width of shifted value.
 //   result = (arg << shift) & (shift >= argbits ? 0 : 0xffffffffffffffff)
-(Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
+(Lsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHLQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
-(Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
+(Lsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
-(Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
+(Lsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
-(Lsh8x(64|32|16|8)  <t> x y) && !shiftIsBounded(v) -> (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
+(Lsh8x(64|32|16|8)  <t> x y) && !shiftIsBounded(v) => (ANDL (SHLL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
-(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLQ x y)
+(Lsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLQ x y)
-(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
+(Lsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y)
-(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SHLL x y)
+(Lsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SHLL x y)
-(Lsh8x(64|32|16|8)  x y) && shiftIsBounded(v) -> (SHLL x y)
+(Lsh8x(64|32|16|8)  x y) && shiftIsBounded(v) => (SHLL x y)
-(Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
+(Rsh64Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDQ (SHRQ <t> x y) (SBBQcarrymask <t> (CMP(Q|L|W|B)const y [64])))
-(Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
+(Rsh32Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRL <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [32])))
-(Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16])))
+(Rsh16Ux(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (ANDL (SHRW <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [16])))
-(Rsh8Ux(64|32|16|8)  <t> x y) && !shiftIsBounded(v) -> (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8])))
+(Rsh8Ux(64|32|16|8)  <t> x y) && !shiftIsBounded(v) => (ANDL (SHRB <t> x y) (SBBLcarrymask <t> (CMP(Q|L|W|B)const y [8])))
-(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRQ x y)
+(Rsh64Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRQ x y)
-(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRL x y)
+(Rsh32Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRL x y)
-(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) -> (SHRW x y)
+(Rsh16Ux(64|32|16|8) x y) && shiftIsBounded(v) => (SHRW x y)
-(Rsh8Ux(64|32|16|8)  x y) && shiftIsBounded(v) -> (SHRB x y)
+(Rsh8Ux(64|32|16|8)  x y) && shiftIsBounded(v) => (SHRB x y)
 // Signed right shift needs to return 0/-1 if shift amount is >= width of shifted value.
 // We implement this by setting the shift value to -1 (all ones) if the shift value is >= width.
-(Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64])))))
+(Rsh64x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARQ <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [64])))))
-(Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32])))))
+(Rsh32x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARL <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [32])))))
-(Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) -> (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16])))))
+(Rsh16x(64|32|16|8) <t> x y) && !shiftIsBounded(v) => (SARW <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [16])))))
-(Rsh8x(64|32|16|8)  <t> x y) && !shiftIsBounded(v) -> (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8])))))
+(Rsh8x(64|32|16|8)  <t> x y) && !shiftIsBounded(v) => (SARB <t> x (OR(Q|L|L|L) <y.Type> y (NOT(Q|L|L|L) <y.Type> (SBB(Q|L|L|L)carrymask <y.Type> (CMP(Q|L|W|B)const y [8])))))
-(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARQ x y)
+(Rsh64x(64|32|16|8) x y) && shiftIsBounded(v) => (SARQ x y)
-(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARL x y)
+(Rsh32x(64|32|16|8) x y) && shiftIsBounded(v) => (SARL x y)
-(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) -> (SARW x y)
+(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
-(Rsh8x(64|32|16|8) x y)  && shiftIsBounded(v) -> (SARB x y)
+(Rsh8x(64|32|16|8) x y)  && shiftIsBounded(v) => (SARB x y)
-// Lowering comparisons
+// Lowering integer comparisons
-(Less(64|32|16|8)  x y) -> (SETL (CMP(Q|L|W|B) x y))
+(Less(64|32|16|8)      x y) => (SETL  (CMP(Q|L|W|B)     x y))
-(Less(64|32|16|8)U x y) -> (SETB (CMP(Q|L|W|B) x y))
+(Less(64|32|16|8)U     x y) => (SETB  (CMP(Q|L|W|B)     x y))
-// Use SETGF with reversed operands to dodge NaN case
+(Leq(64|32|16|8)       x y) => (SETLE (CMP(Q|L|W|B)     x y))
-(Less(32|64)F x y) -> (SETGF (UCOMIS(S|D) y x))
+(Leq(64|32|16|8)U      x y) => (SETBE (CMP(Q|L|W|B)     x y))
-
+(Eq(Ptr|64|32|16|8|B)  x y) => (SETEQ (CMP(Q|Q|L|W|B|B) x y))
-(Leq(64|32|16|8)  x y) -> (SETLE (CMP(Q|L|W|B) x y))
+(Neq(Ptr|64|32|16|8|B) x y) => (SETNE (CMP(Q|Q|L|W|B|B) x y))
 (Leq(64|32|16|8)U x y) -> (SETBE (CMP(Q|L|W|B) x y))
 // Use SETGEF with reversed operands to dodge NaN case
 (Leq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) y x))
 // Lowering floating point comparisons
 // Note Go assembler gets UCOMISx operand order wrong, but it is right here
-// Bug is accommodated at generation of assembly language.
+// and the operands are reversed when generating assembly language.
-(Greater(32|64)F x y) -> (SETGF (UCOMIS(S|D) x y))
+(Eq(32|64)F   x y) => (SETEQF (UCOMIS(S|D) x y))
-
+(Neq(32|64)F  x y) => (SETNEF (UCOMIS(S|D) x y))
-// Note Go assembler gets UCOMISx operand order wrong, but it is right here
+// Use SETGF/SETGEF with reversed operands to dodge NaN case.
-// Bug is accommodated at generation of assembly language.
+(Less(32|64)F x y) => (SETGF  (UCOMIS(S|D) y x))
-(Geq(32|64)F x y) -> (SETGEF (UCOMIS(S|D) x y))
+(Leq(32|64)F  x y) => (SETGEF (UCOMIS(S|D) y x))
 (Eq(64|32|16|8|B)  x y) -> (SETEQ (CMP(Q|L|W|B|B) x y))
 (EqPtr x y) -> (SETEQ (CMPQ x y))
 (Eq(32|64)F x y) -> (SETEQF (UCOMIS(S|D) x y))
 (Neq(64|32|16|8|B)  x y) -> (SETNE (CMP(Q|L|W|B|B) x y))
 (NeqPtr x y) -> (SETNE (CMPQ x y))
 (Neq(32|64)F x y) -> (SETNEF (UCOMIS(S|D) x y))
 // Lowering loads
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVQload ptr mem)
+(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVQload ptr mem)
-(Load <t> ptr mem) && is32BitInt(t) -> (MOVLload ptr mem)
+(Load <t> ptr mem) && is32BitInt(t) => (MOVLload ptr mem)
-(Load <t> ptr mem) && is16BitInt(t) -> (MOVWload ptr mem)
+(Load <t> ptr mem) && is16BitInt(t) => (MOVWload ptr mem)
-(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) -> (MOVBload ptr mem)
+(Load <t> ptr mem) && (t.IsBoolean() || is8BitInt(t)) => (MOVBload ptr mem)
-(Load <t> ptr mem) && is32BitFloat(t) -> (MOVSSload ptr mem)
+(Load <t> ptr mem) && is32BitFloat(t) => (MOVSSload ptr mem)
-(Load <t> ptr mem) && is64BitFloat(t) -> (MOVSDload ptr mem)
+(Load <t> ptr mem) && is64BitFloat(t) => (MOVSDload ptr mem)
 // Lowering stores
 // These more-specific FP versions of Store pattern should come first.
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVSDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (MOVSDstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVSSstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (MOVSSstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 -> (MOVQstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 => (MOVQstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 -> (MOVLstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 => (MOVLstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVWstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 2 => (MOVWstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
 // Lowering moves
-(Move [0] _ _ mem) -> mem
+(Move [0] _ _ mem) => mem
-(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
+(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
-(Move [2] dst src mem) -> (MOVWstore dst (MOVWload src mem) mem)
+(Move [2] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
-(Move [4] dst src mem) -> (MOVLstore dst (MOVLload src mem) mem)
+(Move [4] dst src mem) => (MOVLstore dst (MOVLload src mem) mem)
-(Move [8] dst src mem) -> (MOVQstore dst (MOVQload src mem) mem)
+(Move [8] dst src mem) => (MOVQstore dst (MOVQload src mem) mem)
-(Move [16] dst src mem) && config.useSSE -> (MOVOstore dst (MOVOload src mem) mem)
+(Move [16] dst src mem) && config.useSSE => (MOVOstore dst (MOVOload src mem) mem)
-(Move [16] dst src mem) && !config.useSSE ->
+(Move [16] dst src mem) && !config.useSSE =>
 	(MOVQstore [8] dst (MOVQload [8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [32] dst src mem) ->
+(Move [32] dst src mem) =>
 	(Move [16]
 		(OffPtr <dst.Type> dst [16])
 		(OffPtr <src.Type> src [16])
 		(Move [16] dst src mem))
-(Move [48] dst src mem) && config.useSSE ->
+(Move [48] dst src mem) && config.useSSE =>
 	(Move [32]
 		(OffPtr <dst.Type> dst [16])
 		(OffPtr <src.Type> src [16])
 		(Move [16] dst src mem))
-(Move [64] dst src mem) && config.useSSE ->
+(Move [64] dst src mem) && config.useSSE =>
 	(Move [32]
 		(OffPtr <dst.Type> dst [32])
 		(OffPtr <src.Type> src [32])
 		(Move [32] dst src mem))
-(Move [3] dst src mem) ->
+(Move [3] dst src mem) =>
 	(MOVBstore [2] dst (MOVBload [2] src mem)
 		(MOVWstore dst (MOVWload src mem) mem))
-(Move [5] dst src mem) ->
+(Move [5] dst src mem) =>
 	(MOVBstore [4] dst (MOVBload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [6] dst src mem) ->
+(Move [6] dst src mem) =>
 	(MOVWstore [4] dst (MOVWload [4] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [7] dst src mem) ->
+(Move [7] dst src mem) =>
 	(MOVLstore [3] dst (MOVLload [3] src mem)
 		(MOVLstore dst (MOVLload src mem) mem))
-(Move [9] dst src mem) ->
+(Move [9] dst src mem) =>
 	(MOVBstore [8] dst (MOVBload [8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [10] dst src mem) ->
+(Move [10] dst src mem) =>
 	(MOVWstore [8] dst (MOVWload [8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [12] dst src mem) ->
+(Move [12] dst src mem) =>
 	(MOVLstore [8] dst (MOVLload [8] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))
-(Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 ->
+(Move [s] dst src mem) && s == 11 || s >= 13 && s <= 15 =>
-	(MOVQstore [s-8] dst (MOVQload [s-8] src mem)
+	(MOVQstore [int32(s-8)] dst (MOVQload [int32(s-8)] src mem)
 		(MOVQstore dst (MOVQload src mem) mem))
 // Adjust moves to be a multiple of 16 bytes.
 (Move [s] dst src mem)
-	&& s > 16 && s%16 != 0 && s%16 <= 8 ->
+	&& s > 16 && s%16 != 0 && s%16 <= 8 =>
 	(Move [s-s%16]
 		(OffPtr <dst.Type> dst [s%16])
 		(OffPtr <src.Type> src [s%16])
 		(MOVQstore dst (MOVQload src mem) mem))
 (Move [s] dst src mem)
-	&& s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE ->
+	&& s > 16 && s%16 != 0 && s%16 > 8 && config.useSSE =>
 	(Move [s-s%16]
 		(OffPtr <dst.Type> dst [s%16])
 		(OffPtr <src.Type> src [s%16])
 		(MOVOstore dst (MOVOload src mem) mem))
 (Move [s] dst src mem)
-	&& s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE ->
+	&& s > 16 && s%16 != 0 && s%16 > 8 && !config.useSSE =>
 	(Move [s-s%16]
 		(OffPtr <dst.Type> dst [s%16])
 		(OffPtr <src.Type> src [s%16])
@ -317,81 +308,75 @@
 // Medium copying uses a duff device.
 (Move [s] dst src mem)
 	&& s > 64 && s <= 16*64 && s%16 == 0
-	&& !config.noDuffDevice ->
+	&& !config.noDuffDevice && logLargeCopy(v, s) =>
-	(DUFFCOPY [14*(64-s/16)] dst src mem)
+	(DUFFCOPY [s] dst src mem)
 // 14 and 64 are magic constants.  14 is the number of bytes to encode:
 //	MOVUPS	(SI), X0
 //	ADDQ	$16, SI
 //	MOVUPS	X0, (DI)
 //	ADDQ	$16, DI
 // and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
 // Large copying uses REP MOVSQ.
-(Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 ->
+(Move [s] dst src mem) && (s > 16*64 || config.noDuffDevice) && s%8 == 0 && logLargeCopy(v, s) =>
 	(REPMOVSQ dst src (MOVQconst [s/8]) mem)
 // Lowering Zero instructions
-(Zero [0] _ mem) -> mem
+(Zero [0] _ mem) => mem
-(Zero [1] destptr mem) -> (MOVBstoreconst [0] destptr mem)
+(Zero [1] destptr mem) => (MOVBstoreconst [makeValAndOff32(0,0)] destptr mem)
-(Zero [2] destptr mem) -> (MOVWstoreconst [0] destptr mem)
+(Zero [2] destptr mem) => (MOVWstoreconst [makeValAndOff32(0,0)] destptr mem)
-(Zero [4] destptr mem) -> (MOVLstoreconst [0] destptr mem)
+(Zero [4] destptr mem) => (MOVLstoreconst [makeValAndOff32(0,0)] destptr mem)
-(Zero [8] destptr mem) -> (MOVQstoreconst [0] destptr mem)
+(Zero [8] destptr mem) => (MOVQstoreconst [makeValAndOff32(0,0)] destptr mem)
-(Zero [3] destptr mem) ->
+(Zero [3] destptr mem) =>
-	(MOVBstoreconst [makeValAndOff(0,2)] destptr
+	(MOVBstoreconst [makeValAndOff32(0,2)] destptr
-		(MOVWstoreconst [0] destptr mem))
+		(MOVWstoreconst [makeValAndOff32(0,0)] destptr mem))
-(Zero [5] destptr mem) ->
+(Zero [5] destptr mem) =>
-	(MOVBstoreconst [makeValAndOff(0,4)] destptr
+	(MOVBstoreconst [makeValAndOff32(0,4)] destptr
-		(MOVLstoreconst [0] destptr mem))
+		(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
-(Zero [6] destptr mem) ->
+(Zero [6] destptr mem) =>
-	(MOVWstoreconst [makeValAndOff(0,4)] destptr
+	(MOVWstoreconst [makeValAndOff32(0,4)] destptr
-		(MOVLstoreconst [0] destptr mem))
+		(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
-(Zero [7] destptr mem) ->
+(Zero [7] destptr mem) =>
-	(MOVLstoreconst [makeValAndOff(0,3)] destptr
+	(MOVLstoreconst [makeValAndOff32(0,3)] destptr
-		(MOVLstoreconst [0] destptr mem))
+		(MOVLstoreconst [makeValAndOff32(0,0)] destptr mem))
 // Strip off any fractional word zeroing.
-(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE ->
+(Zero [s] destptr mem) && s%8 != 0 && s > 8 && !config.useSSE =>
 	(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
-		(MOVQstoreconst [0] destptr mem))
+		(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
 // Zero small numbers of words directly.
-(Zero [16] destptr mem) && !config.useSSE ->
+(Zero [16] destptr mem) && !config.useSSE =>
-	(MOVQstoreconst [makeValAndOff(0,8)] destptr
+	(MOVQstoreconst [makeValAndOff32(0,8)] destptr
-		(MOVQstoreconst [0] destptr mem))
+		(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
-(Zero [24] destptr mem) && !config.useSSE ->
+(Zero [24] destptr mem) && !config.useSSE =>
-	(MOVQstoreconst [makeValAndOff(0,16)] destptr
+	(MOVQstoreconst [makeValAndOff32(0,16)] destptr
-		(MOVQstoreconst [makeValAndOff(0,8)] destptr
+		(MOVQstoreconst [makeValAndOff32(0,8)] destptr
-			(MOVQstoreconst [0] destptr mem)))
+			(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem)))
-(Zero [32] destptr mem) && !config.useSSE ->
+(Zero [32] destptr mem) && !config.useSSE =>
-	(MOVQstoreconst [makeValAndOff(0,24)] destptr
+	(MOVQstoreconst [makeValAndOff32(0,24)] destptr
-		(MOVQstoreconst [makeValAndOff(0,16)] destptr
+		(MOVQstoreconst [makeValAndOff32(0,16)] destptr
-			(MOVQstoreconst [makeValAndOff(0,8)] destptr
+			(MOVQstoreconst [makeValAndOff32(0,8)] destptr
-				(MOVQstoreconst [0] destptr mem))))
+				(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))))
-(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE ->
+(Zero [s] destptr mem) && s > 8 && s < 16 && config.useSSE =>
-	(MOVQstoreconst [makeValAndOff(0,s-8)] destptr
+	(MOVQstoreconst [makeValAndOff32(0,int32(s-8))] destptr
-		(MOVQstoreconst [0] destptr mem))
+		(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
 // Adjust zeros to be a multiple of 16 bytes.
-(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE ->
+(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 > 8 && config.useSSE =>
 	(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
 		(MOVOstore destptr (MOVOconst [0]) mem))
-(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE ->
+(Zero [s] destptr mem) && s%16 != 0 && s > 16 && s%16 <= 8 && config.useSSE =>
 	(Zero [s-s%16] (OffPtr <destptr.Type> destptr [s%16])
-		(MOVQstoreconst [0] destptr mem))
+		(MOVQstoreconst [makeValAndOff32(0,0)] destptr mem))
-(Zero [16] destptr mem) && config.useSSE ->
+(Zero [16] destptr mem) && config.useSSE =>
 	(MOVOstore destptr (MOVOconst [0]) mem)
-(Zero [32] destptr mem) && config.useSSE ->
+(Zero [32] destptr mem) && config.useSSE =>
 	(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
 		(MOVOstore destptr (MOVOconst [0]) mem))
-(Zero [48] destptr mem) && config.useSSE ->
+(Zero [48] destptr mem) && config.useSSE =>
 	(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
 		(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
 			(MOVOstore destptr (MOVOconst [0]) mem)))
-(Zero [64] destptr mem) && config.useSSE ->
+(Zero [64] destptr mem) && config.useSSE =>
 	(MOVOstore (OffPtr <destptr.Type> destptr [48]) (MOVOconst [0])
 		(MOVOstore (OffPtr <destptr.Type> destptr [32]) (MOVOconst [0])
 			(MOVOstore (OffPtr <destptr.Type> destptr [16]) (MOVOconst [0])
@ -399,24 +384,24 @@
 // Medium zeroing uses a duff device.
 (Zero [s] destptr mem)
-	&& s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice ->
+	&& s > 64 && s <= 1024 && s%16 == 0 && !config.noDuffDevice =>
 	(DUFFZERO [s] destptr (MOVOconst [0]) mem)
 // Large zeroing uses REP STOSQ.
 (Zero [s] destptr mem)
 	&& (s > 1024 || (config.noDuffDevice && s > 64 || !config.useSSE && s > 32))
-	&& s%8 == 0 ->
+	&& s%8 == 0 =>
 	(REPSTOSQ destptr (MOVQconst [s/8]) (MOVQconst [0]) mem)
 // Lowering constants
-(Const8   ...) -> (MOVLconst ...)
+(Const8   [c]) => (MOVLconst [int32(c)])
-(Const16  ...) -> (MOVLconst ...)
+(Const16  [c]) => (MOVLconst [int32(c)])
-(Const32  ...) -> (MOVLconst ...)
+(Const32  ...) => (MOVLconst ...)
-(Const64  ...) -> (MOVQconst ...)
+(Const64  ...) => (MOVQconst ...)
-(Const32F ...) -> (MOVSSconst ...)
+(Const32F ...) => (MOVSSconst ...)
-(Const64F ...) -> (MOVSDconst ...)
+(Const64F ...) => (MOVSDconst ...)
-(ConstNil ...) -> (MOVQconst ...)
+(ConstNil    ) => (MOVQconst [0])
-(ConstBool ...) -> (MOVLconst ...)
+(ConstBool [c]) => (MOVLconst [int32(b2i(c))])
 // Lowering calls
 (StaticCall ...) -> (CALLstatic ...)
@ -478,6 +463,8 @@
 (GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
 (GetCallerPC ...) -> (LoweredGetCallerPC ...)
 (GetCallerSP ...) -> (LoweredGetCallerSP ...)
 (HasCPUFeature {s}) -> (SETNE (CMPQconst [0] (LoweredHasCPUFeature {s})))
 (Addr ...) -> (LEAQ ...)
 (LocalAddr {sym} base _) -> (LEAQ {sym} base)
@ -578,6 +565,10 @@
 (SETB (TEST(Q|L|W|B) x x)) -> (ConstBool [0])
 (SETAE (TEST(Q|L|W|B) x x)) -> (ConstBool [1])
 // x & 1 != 0 -> x & 1
 (SETNE (TEST(B|W)const [1] x)) => (AND(L|L)const [1] x)
 (SETB (BT(L|Q)const [0] x)) => (AND(L|Q)const [1] x)
 // Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
 // Note that BTx instructions use the carry bit, so we need to convert tests for zero flag
 // into tests for carry flags.
@ -2153,11 +2144,14 @@
 (CMP(Q|L|W|B) l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) x) && canMergeLoad(v, l) && clobber(l) -> (CMP(Q|L|W|B)load {sym} [off] ptr x mem)
 (CMP(Q|L|W|B) x l:(MOV(Q|L|W|B)load {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) -> (InvertFlags (CMP(Q|L|W|B)load {sym} [off] ptr x mem))
-(CMP(Q|L|W|B)const l:(MOV(Q|L|W|B)load {sym} [off] ptr mem) [c])
+(CMP(Q|L)const l:(MOV(Q|L)load {sym} [off] ptr mem) [c])
 	&& l.Uses == 1
-	&& validValAndOff(c, off)
+	&& clobber(l) =>
-	&& clobber(l) ->
+@l.Block (CMP(Q|L)constload {sym} [makeValAndOff32(c,off)] ptr mem)
-  @l.Block (CMP(Q|L|W|B)constload {sym} [makeValAndOff(c,off)] ptr mem)
+(CMP(W|B)const l:(MOV(W|B)load {sym} [off] ptr mem) [c])
 	&& l.Uses == 1
 	&& clobber(l) =>
@l.Block (CMP(W|B)constload {sym} [makeValAndOff32(int32(c),off)] ptr mem)
 (CMPQload {sym} [off] ptr (MOVQconst [c]) mem) && validValAndOff(c,off) -> (CMPQconstload {sym} [makeValAndOff(c,off)] ptr mem)
 (CMPLload {sym} [off] ptr (MOVLconst [c]) mem) && validValAndOff(c,off) -> (CMPLconstload {sym} [makeValAndOff(c,off)] ptr mem)
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@ -136,10 +136,11 @@ func init() {
 		readflags = regInfo{inputs: nil, outputs: gponly}
 		flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
-		gpload    = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
+		gpload      = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
-		gp21load  = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
+		gp21load    = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
-		gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
+		gploadidx   = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
-		gp21pax   = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
+		gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly}
 		gp21pax     = regInfo{inputs: []regMask{gp &^ ax, gp}, outputs: []regMask{gp &^ ax}, clobbers: ax}
 		gpstore         = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
 		gpstoreconst    = regInfo{inputs: []regMask{gpspsb, 0}}
@ -409,6 +410,32 @@ func init() {
 		{name: "XORQload", argLength: 3, reg: gp21load, asm: "XORQ", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
 		{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+auxint+aux, arg2 = mem
 		{name: "ADDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+4*arg2+auxint+aux, arg3 = mem
 		{name: "ADDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "ADDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ADDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ADDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "SUBLloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+4*arg2+auxint+aux, arg3 = mem
 		{name: "SUBLloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "SUBQloadidx1", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "SUBQloadidx8", argLength: 4, reg: gp21loadidx, asm: "SUBQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "ANDLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+4*arg2+auxint+aux, arg3 = mem
 		{name: "ANDLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "ANDQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ANDQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ANDQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "ORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+4*arg2+auxint+aux, arg3 = mem
 		{name: "ORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "ORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "ORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "ORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"},   // arg0 | tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "XORLloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 4, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+4*arg2+auxint+aux, arg3 = mem
 		{name: "XORLloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORL", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		{name: "XORQloadidx1", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 1, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+  arg2+auxint+aux, arg3 = mem
 		{name: "XORQloadidx8", argLength: 4, reg: gp21loadidx, asm: "XORQ", scale: 8, aux: "SymOff", resultInArg0: true, clobberFlags: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from  arg1+8*arg2+auxint+aux, arg3 = mem
 		// direct binary-op on memory (read-modify-write)
 		{name: "ADDQmodify", argLength: 3, reg: gpstore, asm: "ADDQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) += arg1, arg2=mem
 		{name: "SUBQmodify", argLength: 3, reg: gpstore, asm: "SUBQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) -= arg1, arg2=mem
@ -421,6 +448,53 @@ func init() {
 		{name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"},   // *(arg0+auxint+aux) |= arg1, arg2=mem
 		{name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
 		{name: "ADDQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ADDQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) += arg2, arg3=mem
 		{name: "ADDQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ADDQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) += arg2, arg3=mem
 		{name: "SUBQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "SUBQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) -= arg2, arg3=mem
 		{name: "SUBQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "SUBQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) -= arg2, arg3=mem
 		{name: "ANDQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ANDQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) &= arg2, arg3=mem
 		{name: "ANDQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ANDQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) &= arg2, arg3=mem
 		{name: "ORQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ORQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+1*arg1+auxint+aux) |= arg2, arg3=mem
 		{name: "ORQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ORQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+8*arg1+auxint+aux) |= arg2, arg3=mem
 		{name: "XORQmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "XORQ", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) ^= arg2, arg3=mem
 		{name: "XORQmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "XORQ", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) ^= arg2, arg3=mem
 		{name: "ADDLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) += arg2, arg3=mem
 		{name: "ADDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+auxint+aux) += arg2, arg3=mem
 		{name: "ADDLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ADDL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) += arg2, arg3=mem
 		{name: "SUBLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) -= arg2, arg3=mem
 		{name: "SUBLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+auxint+aux) -= arg2, arg3=mem
 		{name: "SUBLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "SUBL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) -= arg2, arg3=mem
 		{name: "ANDLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) &= arg2, arg3=mem
 		{name: "ANDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+auxint+aux) &= arg2, arg3=mem
 		{name: "ANDLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ANDL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) &= arg2, arg3=mem
 		{name: "ORLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+1*arg1+auxint+aux) |= arg2, arg3=mem
 		{name: "ORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+4*arg1+auxint+aux) |= arg2, arg3=mem
 		{name: "ORLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "ORL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+8*arg1+auxint+aux) |= arg2, arg3=mem
 		{name: "XORLmodifyidx1", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 1, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+auxint+aux) ^= arg2, arg3=mem
 		{name: "XORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 4, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+auxint+aux) ^= arg2, arg3=mem
 		{name: "XORLmodifyidx8", argLength: 4, reg: gpstoreidx, asm: "XORL", scale: 8, aux: "SymOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+auxint+aux) ^= arg2, arg3=mem
 		{name: "ADDQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ADDQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) += ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ADDQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ADDQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) += ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ANDQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ANDQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) &= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ANDQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ANDQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) &= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ORQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ORQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) |= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ORQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ORQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) |= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "XORQconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "XORQ", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) ^= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "XORQconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "XORQ", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) ^= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ADDLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) += ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ADDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+ValAndOff(AuxInt).Off()+aux) += ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ADDLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) += ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ANDLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) &= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ANDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+ValAndOff(AuxInt).Off()+aux) &= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ANDLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) &= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ORLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) |= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+4*arg1+ValAndOff(AuxInt).Off()+aux) |= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "ORLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "ORL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"},   // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) |= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "XORLconstmodifyidx1", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 1, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+1*arg1+ValAndOff(AuxInt).Off()+aux) ^= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "XORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 4, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+4*arg1+ValAndOff(AuxInt).Off()+aux) ^= ValAndOff(AuxInt).Val(), arg2=mem
 		{name: "XORLconstmodifyidx8", argLength: 3, reg: gpstoreconstidx, asm: "XORL", scale: 8, aux: "SymValAndOff", typ: "Mem", clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+8*arg1+ValAndOff(AuxInt).Off()+aux) ^= ValAndOff(AuxInt).Val(), arg2=mem
 		// unary ops
 		{name: "NEGQ", argLength: 1, reg: gp11, asm: "NEGQ", resultInArg0: true, clobberFlags: true}, // -arg0
 		{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
@ -681,7 +755,7 @@ func init() {
 		// arg0 = destination pointer
 		// arg1 = source pointer
 		// arg2 = mem
-		// auxint = offset from duffcopy symbol to call
+		// auxint = # of bytes to copy, must be multiple of 16
 		// returns memory
 		{
 			name:      "DUFFCOPY",
@ -738,12 +812,14 @@ func init() {
 		// It saves all GP registers if necessary, but may clobber others.
 		{name: "LoweredWB", argLength: 3, reg: regInfo{inputs: []regMask{buildReg("DI"), buildReg("AX CX DX BX BP SI R8 R9")}, clobbers: callerSave &^ gp}, clobberFlags: true, aux: "Sym", symEffect: "None"},
 		{name: "LoweredHasCPUFeature", argLength: 0, reg: gp01, rematerializeable: true, typ: "UInt64", aux: "Sym", symEffect: "None"},
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{dx, bx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{cx, dx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{ax, cx}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
--- a/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64splitload.rules
@ -2,25 +2,44 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
-// This file contains rules used by flagalloc to split
+// This file contains rules used by flagalloc and addressingmodes to
-// a flag-generating merged load op into separate load and op.
+// split a flag-generating merged load op into separate load and op.
 // Unlike with the other rules files, not all of these
 // rules will be applied to all values.
 // Rather, flagalloc will request for rules to be applied
 // to a particular problematic value.
 // These are often the exact inverse of rules in AMD64.rules,
 // only with the conditions removed.
 //
 // For addressingmodes, certain single instructions are slower than the two instruction
 // split generated here (which is different from the inputs to addressingmodes).
 // For example:
 // (CMPBconstload c (ADDQ x y)) -> (CMPBconstloadidx1 c x y) -> (CMPB c (MOVBloadidx1 x y))
-(CMP(Q|L|W|B)load {sym} [off] ptr x mem) -> (CMP(Q|L|W|B) (MOV(Q|L|W|B)load {sym} [off] ptr mem) x)
+(CMP(Q|L|W|B)load {sym} [off] ptr x mem) => (CMP(Q|L|W|B) (MOV(Q|L|W|B)load {sym} [off] ptr mem) x)
-(CMP(Q|L|W|B)constload {sym} [vo] ptr mem) -> (CMP(Q|L|W|B)const (MOV(Q|L|W|B)load {sym} [offOnly(vo)] ptr mem) [valOnly(vo)])
+(CMP(Q|L|W|B)constload {sym} [vo] ptr mem) && vo.Val() == 0 => (TEST(Q|L|W|B) x:(MOV(Q|L|W|B)load {sym} [vo.Off32()] ptr mem) x)
-(CMP(Q|L|W|B)loadidx1 {sym} [off] ptr idx x mem) -> (CMP(Q|L|W|B) (MOV(Q|L|W|B)loadidx1 {sym} [off] ptr idx mem) x)
+(CMPQconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPQconst (MOVQload {sym} [vo.Off32()] ptr mem) [vo.Val32()])
-(CMPQloadidx8 {sym} [off] ptr idx x mem) -> (CMPQ (MOVQloadidx8 {sym} [off] ptr idx mem) x)
+(CMPLconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPLconst (MOVLload {sym} [vo.Off32()] ptr mem) [vo.Val32()])
-(CMPLloadidx4 {sym} [off] ptr idx x mem) -> (CMPL (MOVLloadidx4 {sym} [off] ptr idx mem) x)
+(CMPWconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPWconst (MOVWload {sym} [vo.Off32()] ptr mem) [vo.Val16()])
-(CMPWloadidx2 {sym} [off] ptr idx x mem) -> (CMPW (MOVWloadidx2 {sym} [off] ptr idx mem) x)
+(CMPBconstload {sym} [vo] ptr mem) && vo.Val() != 0 => (CMPBconst (MOVBload {sym} [vo.Off32()] ptr mem) [vo.Val8()])
-(CMP(Q|L|W|B)constloadidx1 {sym} [vo] ptr idx mem) -> (CMP(Q|L|W|B)const (MOV(Q|L|W|B)loadidx1 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)])
+(CMP(Q|L|W|B)loadidx1 {sym} [off] ptr idx x mem) => (CMP(Q|L|W|B) (MOV(Q|L|W|B)loadidx1 {sym} [off] ptr idx mem) x)
-(CMPQconstloadidx8 {sym} [vo] ptr idx mem) -> (CMPQconst (MOVQloadidx8 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)])
+(CMPQloadidx8 {sym} [off] ptr idx x mem) => (CMPQ (MOVQloadidx8 {sym} [off] ptr idx mem) x)
-(CMPLconstloadidx4 {sym} [vo] ptr idx mem) -> (CMPLconst (MOVLloadidx4 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)])
+(CMPLloadidx4 {sym} [off] ptr idx x mem) => (CMPL (MOVLloadidx4 {sym} [off] ptr idx mem) x)
-(CMPWconstloadidx2 {sym} [vo] ptr idx mem) -> (CMPWconst (MOVWloadidx2 {sym} [offOnly(vo)] ptr idx mem) [valOnly(vo)])
+(CMPWloadidx2 {sym} [off] ptr idx x mem) => (CMPW (MOVWloadidx2 {sym} [off] ptr idx mem) x)
 (CMP(Q|L|W|B)constloadidx1 {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TEST(Q|L|W|B) x:(MOV(Q|L|W|B)loadidx1 {sym} [vo.Off32()] ptr idx mem) x)
 (CMPQconstloadidx8         {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTQ         x:(MOVQloadidx8         {sym} [vo.Off32()] ptr idx mem) x)
 (CMPLconstloadidx4         {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTL         x:(MOVLloadidx4         {sym} [vo.Off32()] ptr idx mem) x)
 (CMPWconstloadidx2         {sym} [vo] ptr idx mem) && vo.Val() == 0 => (TESTW         x:(MOVWloadidx2         {sym} [vo.Off32()] ptr idx mem) x)
 (CMPQconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPQconst (MOVQloadidx1 {sym} [vo.Off32()] ptr idx mem) [vo.Val32()])
 (CMPLconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPLconst (MOVLloadidx1 {sym} [vo.Off32()] ptr idx mem) [vo.Val32()])
 (CMPWconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPWconst (MOVWloadidx1 {sym} [vo.Off32()] ptr idx mem) [vo.Val16()])
 (CMPBconstloadidx1 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPBconst (MOVBloadidx1 {sym} [vo.Off32()] ptr idx mem) [vo.Val8()])
 (CMPQconstloadidx8 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPQconst (MOVQloadidx8 {sym} [vo.Off32()] ptr idx mem) [vo.Val32()])
 (CMPLconstloadidx4 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPLconst (MOVLloadidx4 {sym} [vo.Off32()] ptr idx mem) [vo.Val32()])
 (CMPWconstloadidx2 {sym} [vo] ptr idx mem) && vo.Val() != 0 => (CMPWconst (MOVWloadidx2 {sym} [vo.Off32()] ptr idx mem) [vo.Val16()])
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@ -243,10 +243,6 @@
 (Leq16U x y) -> (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
 (Leq32U x y) -> (LessEqualU (CMP x y))
 (Greater(32|64)F x y) -> (GreaterThan (CMP(F|D) x y))
 (Geq(32|64)F x y) -> (GreaterEqual (CMP(F|D) x y))
 (OffPtr [off] ptr:(SP)) -> (MOVWaddr [off] ptr)
 (OffPtr [off] ptr) -> (ADDconst [off] ptr)
@ -338,12 +334,12 @@
 // 8 and 128 are magic constants, see runtime/mkduff.go
 (Move [s] {t} dst src mem)
 	&& s%4 == 0 && s > 4 && s <= 512
-	&& t.(*types.Type).Alignment()%4 == 0 && !config.noDuffDevice ->
+	&& t.(*types.Type).Alignment()%4 == 0 && !config.noDuffDevice && logLargeCopy(v, s) ->
 	(DUFFCOPY [8 * (128 - s/4)] dst src mem)
 // Large move uses a loop
 (Move [s] {t} dst src mem)
-	&& (s > 512 || config.noDuffDevice) || t.(*types.Type).Alignment()%4 != 0 ->
+	&& ((s > 512 || config.noDuffDevice) || t.(*types.Type).Alignment()%4 != 0) && logLargeCopy(v, s) ->
 	(LoweredMove [t.(*types.Type).Alignment()]
 		dst
 		src
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@ -351,7 +351,7 @@ func init() {
 		{name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "ARM64BitField"},
 		// moves
-		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 32 low bits of auxint
+		{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true},      // 64 bits from auxint
 		{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
 		{name: "FMOVDconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVD", typ: "Float64", rematerializeable: true}, // auxint as 64-bit float
@ -675,9 +675,9 @@ func init() {
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
+		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
 	}
 	blocks := []blockData{
@ -691,12 +691,12 @@ func init() {
 		{name: "ULE", controls: 1},
 		{name: "UGT", controls: 1},
 		{name: "UGE", controls: 1},
-		{name: "Z", controls: 1},    // Control == 0 (take a register instead of flags)
+		{name: "Z", controls: 1},                  // Control == 0 (take a register instead of flags)
-		{name: "NZ", controls: 1},   // Control != 0
+		{name: "NZ", controls: 1},                 // Control != 0
-		{name: "ZW", controls: 1},   // Control == 0, 32-bit
+		{name: "ZW", controls: 1},                 // Control == 0, 32-bit
-		{name: "NZW", controls: 1},  // Control != 0, 32-bit
+		{name: "NZW", controls: 1},                // Control != 0, 32-bit
-		{name: "TBZ", controls: 1},  // Control & (1 << Aux.(int64)) == 0
+		{name: "TBZ", controls: 1, aux: "Int64"},  // Control & (1 << AuxInt) == 0
-		{name: "TBNZ", controls: 1}, // Control & (1 << Aux.(int64)) != 0
+		{name: "TBNZ", controls: 1, aux: "Int64"}, // Control & (1 << AuxInt) != 0
 		{name: "FLT", controls: 1},
 		{name: "FLE", controls: 1},
 		{name: "FGT", controls: 1},
--- a/src/cmd/compile/internal/ssa/gen/ARMOps.go
+++ b/src/cmd/compile/internal/ssa/gen/ARMOps.go
@ -542,13 +542,13 @@ func init() {
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 		// Extend ops are the same as Bounds ops except the indexes are 64-bit.
-		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r2, r3}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendA", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r2, r3}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r1, r2}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendB", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r1, r2}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
-		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r0, r1}}, typ: "Mem"}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
+		{name: "LoweredPanicExtendC", argLength: 4, aux: "Int64", reg: regInfo{inputs: []regMask{r4, r0, r1}}, typ: "Mem", call: true}, // arg0=idxHi, arg1=idxLo, arg2=len, arg3=mem, returns memory. AuxInt contains report code (see PanicExtend in genericOps.go).
 		// Constant flag values. For any comparison, there are 5 possible
 		// outcomes: the three from the signed total order (<,==,>) and the
--- a/src/cmd/compile/internal/ssa/gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules
--- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
@ -2,220 +2,217 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
-(Add(Ptr|64|32|16|8) ...) -> (ADDV ...)
+(Add(Ptr|64|32|16|8) ...) => (ADDV ...)
-(Add(32|64)F ...) -> (ADD(F|D) ...)
+(Add(32|64)F ...) => (ADD(F|D) ...)
-(Sub(Ptr|64|32|16|8) ...) -> (SUBV ...)
+(Sub(Ptr|64|32|16|8) ...) => (SUBV ...)
-(Sub(32|64)F ...) -> (SUB(F|D) ...)
+(Sub(32|64)F ...) => (SUB(F|D) ...)
-(Mul(64|32|16|8) x y) -> (Select1 (MULVU x y))
+(Mul(64|32|16|8) x y) => (Select1 (MULVU x y))
-(Mul(32|64)F ...) -> (MUL(F|D) ...)
+(Mul(32|64)F ...) => (MUL(F|D) ...)
-(Mul64uhilo ...) -> (MULVU ...)
+(Mul64uhilo ...) => (MULVU ...)
 (Select0 (Mul64uover x y)) -> (Select1 <typ.UInt64> (MULVU x y))
 (Select1 (Mul64uover x y)) -> (SGTU <typ.Bool> (Select0 <typ.UInt64> (MULVU x y)) (MOVVconst <typ.UInt64> [0]))
-(Hmul64 x y) -> (Select0 (MULV x y))
+(Hmul64 x y) => (Select0 (MULV x y))
-(Hmul64u x y) -> (Select0 (MULVU x y))
+(Hmul64u x y) => (Select0 (MULVU x y))
-(Hmul32 x y) -> (SRAVconst (Select1 <typ.Int64> (MULV (SignExt32to64 x) (SignExt32to64 y))) [32])
+(Hmul32 x y) => (SRAVconst (Select1 <typ.Int64> (MULV (SignExt32to64 x) (SignExt32to64 y))) [32])
-(Hmul32u x y) -> (SRLVconst (Select1 <typ.UInt64> (MULVU (ZeroExt32to64 x) (ZeroExt32to64 y))) [32])
+(Hmul32u x y) => (SRLVconst (Select1 <typ.UInt64> (MULVU (ZeroExt32to64 x) (ZeroExt32to64 y))) [32])
-(Div64 x y) -> (Select1 (DIVV x y))
+(Div64 x y) => (Select1 (DIVV x y))
-(Div64u x y) -> (Select1 (DIVVU x y))
+(Div64u x y) => (Select1 (DIVVU x y))
-(Div32 x y) -> (Select1 (DIVV (SignExt32to64 x) (SignExt32to64 y)))
+(Div32 x y) => (Select1 (DIVV (SignExt32to64 x) (SignExt32to64 y)))
-(Div32u x y) -> (Select1 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)))
+(Div32u x y) => (Select1 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)))
-(Div16 x y) -> (Select1 (DIVV (SignExt16to64 x) (SignExt16to64 y)))
+(Div16 x y) => (Select1 (DIVV (SignExt16to64 x) (SignExt16to64 y)))
-(Div16u x y) -> (Select1 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Div16u x y) => (Select1 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)))
-(Div8 x y) -> (Select1 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
+(Div8 x y) => (Select1 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
-(Div8u x y) -> (Select1 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
+(Div8u x y) => (Select1 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
-(Div(32|64)F ...) -> (DIV(F|D) ...)
+(Div(32|64)F ...) => (DIV(F|D) ...)
-(Mod64 x y) -> (Select0 (DIVV x y))
+(Mod64 x y) => (Select0 (DIVV x y))
-(Mod64u x y) -> (Select0 (DIVVU x y))
+(Mod64u x y) => (Select0 (DIVVU x y))
-(Mod32 x y) -> (Select0 (DIVV (SignExt32to64 x) (SignExt32to64 y)))
+(Mod32 x y) => (Select0 (DIVV (SignExt32to64 x) (SignExt32to64 y)))
-(Mod32u x y) -> (Select0 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)))
+(Mod32u x y) => (Select0 (DIVVU (ZeroExt32to64 x) (ZeroExt32to64 y)))
-(Mod16 x y) -> (Select0 (DIVV (SignExt16to64 x) (SignExt16to64 y)))
+(Mod16 x y) => (Select0 (DIVV (SignExt16to64 x) (SignExt16to64 y)))
-(Mod16u x y) -> (Select0 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Mod16u x y) => (Select0 (DIVVU (ZeroExt16to64 x) (ZeroExt16to64 y)))
-(Mod8 x y) -> (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
+(Mod8 x y) => (Select0 (DIVV (SignExt8to64 x) (SignExt8to64 y)))
-(Mod8u x y) -> (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
+(Mod8u x y) => (Select0 (DIVVU (ZeroExt8to64 x) (ZeroExt8to64 y)))
 // (x + y) / 2 with x>=y -> (x - y) / 2 + y
 (Avg64u <t> x y) -> (ADDV (SRLVconst <t> (SUBV <t> x y) [1]) y)
-(And(64|32|16|8) ...) -> (AND ...)
+(And(64|32|16|8) ...) => (AND ...)
-(Or(64|32|16|8) ...) -> (OR ...)
+(Or(64|32|16|8) ...) => (OR ...)
-(Xor(64|32|16|8) ...) -> (XOR ...)
+(Xor(64|32|16|8) ...) => (XOR ...)
 // shifts
 // hardware instruction uses only the low 6 bits of the shift
 // we compare to 64 to ensure Go semantics for large shifts
-(Lsh64x64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
+(Lsh64x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
-(Lsh64x32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
+(Lsh64x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
-(Lsh64x16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
+(Lsh64x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
-(Lsh64x8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
+(Lsh64x8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
-(Lsh32x64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
+(Lsh32x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
-(Lsh32x32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
+(Lsh32x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
-(Lsh32x16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
+(Lsh32x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
-(Lsh32x8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
+(Lsh32x8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
-(Lsh16x64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
+(Lsh16x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
-(Lsh16x32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
+(Lsh16x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
-(Lsh16x16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
+(Lsh16x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
-(Lsh16x8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
+(Lsh16x8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
-(Lsh8x64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
+(Lsh8x64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SLLV <t> x y))
-(Lsh8x32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
+(Lsh8x32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SLLV <t> x (ZeroExt32to64 y)))
-(Lsh8x16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
+(Lsh8x16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SLLV <t> x (ZeroExt16to64 y)))
-(Lsh8x8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
+(Lsh8x8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SLLV <t> x (ZeroExt8to64  y)))
-(Rsh64Ux64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> x y))
+(Rsh64Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> x y))
-(Rsh64Ux32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> x (ZeroExt32to64 y)))
+(Rsh64Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> x (ZeroExt32to64 y)))
-(Rsh64Ux16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> x (ZeroExt16to64 y)))
+(Rsh64Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> x (ZeroExt16to64 y)))
-(Rsh64Ux8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> x (ZeroExt8to64  y)))
+(Rsh64Ux8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> x (ZeroExt8to64  y)))
-(Rsh32Ux64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt32to64 x) y))
+(Rsh32Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt32to64 x) y))
-(Rsh32Ux32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y)))
+(Rsh32Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt32to64 y)))
-(Rsh32Ux16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y)))
+(Rsh32Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt16to64 y)))
-(Rsh32Ux8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64  y)))
+(Rsh32Ux8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt32to64 x) (ZeroExt8to64  y)))
-(Rsh16Ux64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt16to64 x) y))
+(Rsh16Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt16to64 x) y))
-(Rsh16Ux32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)))
+(Rsh16Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt32to64 y)))
-(Rsh16Ux16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Rsh16Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt16to64 y)))
-(Rsh16Ux8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64  y)))
+(Rsh16Ux8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt16to64 x) (ZeroExt8to64  y)))
-(Rsh8Ux64 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt8to64 x) y))
+(Rsh8Ux64 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) y)) (SRLV <t> (ZeroExt8to64 x) y))
-(Rsh8Ux32 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y)))
+(Rsh8Ux32 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt32to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt32to64 y)))
-(Rsh8Ux16 <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y)))
+(Rsh8Ux16 <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt16to64 y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt16to64 y)))
-(Rsh8Ux8  <t> x y) -> (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64  y)))
+(Rsh8Ux8  <t> x y) => (AND (NEGV <t> (SGTU (MOVVconst <typ.UInt64> [64]) (ZeroExt8to64  y))) (SRLV <t> (ZeroExt8to64 x) (ZeroExt8to64  y)))
-(Rsh64x64 <t> x y) -> (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
+(Rsh64x64 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
-(Rsh64x32 <t> x y) -> (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
+(Rsh64x32 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
-(Rsh64x16 <t> x y) -> (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
+(Rsh64x16 <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
-(Rsh64x8  <t> x y) -> (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
+(Rsh64x8  <t> x y) => (SRAV x (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
-(Rsh32x64 <t> x y) -> (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
+(Rsh32x64 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
-(Rsh32x32 <t> x y) -> (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
+(Rsh32x32 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
-(Rsh32x16 <t> x y) -> (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
+(Rsh32x16 <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
-(Rsh32x8  <t> x y) -> (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
+(Rsh32x8  <t> x y) => (SRAV (SignExt32to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
-(Rsh16x64 <t> x y) -> (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
+(Rsh16x64 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
-(Rsh16x32 <t> x y) -> (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
+(Rsh16x32 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
-(Rsh16x16 <t> x y) -> (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
+(Rsh16x16 <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
-(Rsh16x8  <t> x y) -> (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
+(Rsh16x8  <t> x y) => (SRAV (SignExt16to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
-(Rsh8x64 <t> x y) -> (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
+(Rsh8x64 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU y (MOVVconst <typ.UInt64> [63]))) y))
-(Rsh8x32 <t> x y) -> (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
+(Rsh8x32 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt32to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt32to64 y)))
-(Rsh8x16 <t> x y) -> (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
+(Rsh8x16 <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt16to64 y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt16to64 y)))
-(Rsh8x8  <t> x y) -> (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
+(Rsh8x8  <t> x y) => (SRAV (SignExt8to64 x) (OR <t> (NEGV <t> (SGTU (ZeroExt8to64  y) (MOVVconst <typ.UInt64> [63]))) (ZeroExt8to64  y)))
 // rotates
-(RotateLeft8 <t> x (MOVVconst [c])) -> (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
+(RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
-(RotateLeft16 <t> x (MOVVconst [c])) -> (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
+(RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
-(RotateLeft32 <t> x (MOVVconst [c])) -> (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
+(RotateLeft32 <t> x (MOVVconst [c])) => (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
-(RotateLeft64 <t> x (MOVVconst [c])) -> (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
+(RotateLeft64 <t> x (MOVVconst [c])) => (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
 // unary ops
-(Neg(64|32|16|8) ...) -> (NEGV ...)
+(Neg(64|32|16|8) ...) => (NEGV ...)
-(Neg(32|64)F ...) -> (NEG(F|D) ...)
+(Neg(32|64)F ...) => (NEG(F|D) ...)
-(Com(64|32|16|8) x) -> (NOR (MOVVconst [0]) x)
+(Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x)
-(Sqrt ...) -> (SQRTD ...)
+(Sqrt ...) => (SQRTD ...)
 // boolean ops -- booleans are represented with 0=false, 1=true
-(AndB ...) -> (AND ...)
+(AndB ...) => (AND ...)
-(OrB ...) -> (OR ...)
+(OrB ...) => (OR ...)
-(EqB x y) -> (XOR (MOVVconst [1]) (XOR <typ.Bool> x y))
+(EqB x y) => (XOR (MOVVconst [1]) (XOR <typ.Bool> x y))
-(NeqB ...) -> (XOR ...)
+(NeqB ...) => (XOR ...)
-(Not x) -> (XORconst [1] x)
+(Not x) => (XORconst [1] x)
 // constants
 (Const(64|32|16|8) ...) -> (MOVVconst ...)
 (Const(32|64)F ...) -> (MOV(F|D)const ...)
-(ConstNil) -> (MOVVconst [0])
+(ConstNil) => (MOVVconst [0])
 (ConstBool ...) -> (MOVVconst ...)
-(Slicemask <t> x) -> (SRAVconst (NEGV <t> x) [63])
+(Slicemask <t> x) => (SRAVconst (NEGV <t> x) [63])
 // truncations
 // Because we ignore high parts of registers, truncates are just copies.
-(Trunc16to8 ...) -> (Copy ...)
+(Trunc16to8 ...) => (Copy ...)
-(Trunc32to8 ...) -> (Copy ...)
+(Trunc32to8 ...) => (Copy ...)
-(Trunc32to16 ...) -> (Copy ...)
+(Trunc32to16 ...) => (Copy ...)
-(Trunc64to8 ...) -> (Copy ...)
+(Trunc64to8 ...) => (Copy ...)
-(Trunc64to16 ...) -> (Copy ...)
+(Trunc64to16 ...) => (Copy ...)
-(Trunc64to32 ...) -> (Copy ...)
+(Trunc64to32 ...) => (Copy ...)
 // Zero-/Sign-extensions
-(ZeroExt8to16 ...) -> (MOVBUreg ...)
+(ZeroExt8to16 ...) => (MOVBUreg ...)
-(ZeroExt8to32 ...) -> (MOVBUreg ...)
+(ZeroExt8to32 ...) => (MOVBUreg ...)
-(ZeroExt16to32 ...) -> (MOVHUreg ...)
+(ZeroExt16to32 ...) => (MOVHUreg ...)
-(ZeroExt8to64 ...) -> (MOVBUreg ...)
+(ZeroExt8to64 ...) => (MOVBUreg ...)
-(ZeroExt16to64 ...) -> (MOVHUreg ...)
+(ZeroExt16to64 ...) => (MOVHUreg ...)
-(ZeroExt32to64 ...) -> (MOVWUreg ...)
+(ZeroExt32to64 ...) => (MOVWUreg ...)
-(SignExt8to16 ...) -> (MOVBreg ...)
+(SignExt8to16 ...) => (MOVBreg ...)
-(SignExt8to32 ...) -> (MOVBreg ...)
+(SignExt8to32 ...) => (MOVBreg ...)
-(SignExt16to32 ...) -> (MOVHreg ...)
+(SignExt16to32 ...) => (MOVHreg ...)
-(SignExt8to64 ...) -> (MOVBreg ...)
+(SignExt8to64 ...) => (MOVBreg ...)
-(SignExt16to64 ...) -> (MOVHreg ...)
+(SignExt16to64 ...) => (MOVHreg ...)
-(SignExt32to64 ...) -> (MOVWreg ...)
+(SignExt32to64 ...) => (MOVWreg ...)
 // float <-> int conversion
-(Cvt32to32F ...) -> (MOVWF ...)
+(Cvt32to32F ...) => (MOVWF ...)
-(Cvt32to64F ...) -> (MOVWD ...)
+(Cvt32to64F ...) => (MOVWD ...)
-(Cvt64to32F ...) -> (MOVVF ...)
+(Cvt64to32F ...) => (MOVVF ...)
-(Cvt64to64F ...) -> (MOVVD ...)
+(Cvt64to64F ...) => (MOVVD ...)
-(Cvt32Fto32 ...) -> (TRUNCFW ...)
+(Cvt32Fto32 ...) => (TRUNCFW ...)
-(Cvt64Fto32 ...) -> (TRUNCDW ...)
+(Cvt64Fto32 ...) => (TRUNCDW ...)
-(Cvt32Fto64 ...) -> (TRUNCFV ...)
+(Cvt32Fto64 ...) => (TRUNCFV ...)
-(Cvt64Fto64 ...) -> (TRUNCDV ...)
+(Cvt64Fto64 ...) => (TRUNCDV ...)
-(Cvt32Fto64F ...) -> (MOVFD ...)
+(Cvt32Fto64F ...) => (MOVFD ...)
-(Cvt64Fto32F ...) -> (MOVDF ...)
+(Cvt64Fto32F ...) => (MOVDF ...)
-(CvtBoolToUint8 ...) -> (Copy ...)
+(CvtBoolToUint8 ...) => (Copy ...)
-(Round(32|64)F ...) -> (Copy ...)
+(Round(32|64)F ...) => (Copy ...)
 // comparisons
-(Eq8 x y)  -> (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)))
+(Eq8 x y)  => (SGTU (MOVVconst [1]) (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)))
-(Eq16 x y) -> (SGTU (MOVVconst [1]) (XOR (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Eq16 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt16to64 x) (ZeroExt16to64 y)))
-(Eq32 x y) -> (SGTU (MOVVconst [1]) (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)))
+(Eq32 x y) => (SGTU (MOVVconst [1]) (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)))
-(Eq64 x y) -> (SGTU (MOVVconst [1]) (XOR x y))
+(Eq64 x y) => (SGTU (MOVVconst [1]) (XOR x y))
-(EqPtr x y) -> (SGTU (MOVVconst [1]) (XOR x y))
+(EqPtr x y) => (SGTU (MOVVconst [1]) (XOR x y))
-(Eq(32|64)F x y) -> (FPFlagTrue (CMPEQ(F|D) x y))
+(Eq(32|64)F x y) => (FPFlagTrue (CMPEQ(F|D) x y))
-(Neq8 x y)  -> (SGTU (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)) (MOVVconst [0]))
+(Neq8 x y)  => (SGTU (XOR (ZeroExt8to64 x) (ZeroExt8to64 y)) (MOVVconst [0]))
-(Neq16 x y) -> (SGTU (XOR (ZeroExt16to32 x) (ZeroExt16to64 y)) (MOVVconst [0]))
+(Neq16 x y) => (SGTU (XOR (ZeroExt16to32 x) (ZeroExt16to64 y)) (MOVVconst [0]))
-(Neq32 x y) -> (SGTU (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)) (MOVVconst [0]))
+(Neq32 x y) => (SGTU (XOR (ZeroExt32to64 x) (ZeroExt32to64 y)) (MOVVconst [0]))
-(Neq64 x y) -> (SGTU (XOR x y) (MOVVconst [0]))
+(Neq64 x y) => (SGTU (XOR x y) (MOVVconst [0]))
-(NeqPtr x y) -> (SGTU (XOR x y) (MOVVconst [0]))
+(NeqPtr x y) => (SGTU (XOR x y) (MOVVconst [0]))
-(Neq(32|64)F x y) -> (FPFlagFalse (CMPEQ(F|D) x y))
+(Neq(32|64)F x y) => (FPFlagFalse (CMPEQ(F|D) x y))
-(Less8 x y)  -> (SGT (SignExt8to64 y) (SignExt8to64 x))
+(Less8 x y)  => (SGT (SignExt8to64 y) (SignExt8to64 x))
-(Less16 x y) -> (SGT (SignExt16to64 y) (SignExt16to64 x))
+(Less16 x y) => (SGT (SignExt16to64 y) (SignExt16to64 x))
-(Less32 x y) -> (SGT (SignExt32to64 y) (SignExt32to64 x))
+(Less32 x y) => (SGT (SignExt32to64 y) (SignExt32to64 x))
-(Less64 x y) -> (SGT y x)
+(Less64 x y) => (SGT y x)
-(Less(32|64)F x y) -> (FPFlagTrue (CMPGT(F|D) y x)) // reverse operands to work around NaN
+(Less(32|64)F x y) => (FPFlagTrue (CMPGT(F|D) y x)) // reverse operands to work around NaN
-(Less8U x y)  -> (SGTU (ZeroExt8to64 y) (ZeroExt8to64 x))
+(Less8U x y)  => (SGTU (ZeroExt8to64 y) (ZeroExt8to64 x))
-(Less16U x y) -> (SGTU (ZeroExt16to64 y) (ZeroExt16to64 x))
+(Less16U x y) => (SGTU (ZeroExt16to64 y) (ZeroExt16to64 x))
-(Less32U x y) -> (SGTU (ZeroExt32to64 y) (ZeroExt32to64 x))
+(Less32U x y) => (SGTU (ZeroExt32to64 y) (ZeroExt32to64 x))
-(Less64U x y) -> (SGTU y x)
+(Less64U x y) => (SGTU y x)
-(Leq8 x y)  -> (XOR (MOVVconst [1]) (SGT (SignExt8to64 x) (SignExt8to64 y)))
+(Leq8 x y)  => (XOR (MOVVconst [1]) (SGT (SignExt8to64 x) (SignExt8to64 y)))
-(Leq16 x y) -> (XOR (MOVVconst [1]) (SGT (SignExt16to64 x) (SignExt16to64 y)))
+(Leq16 x y) => (XOR (MOVVconst [1]) (SGT (SignExt16to64 x) (SignExt16to64 y)))
-(Leq32 x y) -> (XOR (MOVVconst [1]) (SGT (SignExt32to64 x) (SignExt32to64 y)))
+(Leq32 x y) => (XOR (MOVVconst [1]) (SGT (SignExt32to64 x) (SignExt32to64 y)))
-(Leq64 x y) -> (XOR (MOVVconst [1]) (SGT x y))
+(Leq64 x y) => (XOR (MOVVconst [1]) (SGT x y))
-(Leq(32|64)F x y) -> (FPFlagTrue (CMPGE(F|D) y x)) // reverse operands to work around NaN
+(Leq(32|64)F x y) => (FPFlagTrue (CMPGE(F|D) y x)) // reverse operands to work around NaN
-(Leq8U x y)  -> (XOR (MOVVconst [1]) (SGTU (ZeroExt8to64 x) (ZeroExt8to64 y)))
+(Leq8U x y)  => (XOR (MOVVconst [1]) (SGTU (ZeroExt8to64 x) (ZeroExt8to64 y)))
-(Leq16U x y) -> (XOR (MOVVconst [1]) (SGTU (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Leq16U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt16to64 x) (ZeroExt16to64 y)))
-(Leq32U x y) -> (XOR (MOVVconst [1]) (SGTU (ZeroExt32to64 x) (ZeroExt32to64 y)))
+(Leq32U x y) => (XOR (MOVVconst [1]) (SGTU (ZeroExt32to64 x) (ZeroExt32to64 y)))
-(Leq64U x y) -> (XOR (MOVVconst [1]) (SGTU x y))
+(Leq64U x y) => (XOR (MOVVconst [1]) (SGTU x y))
 (Greater(32|64)F x y) -> (FPFlagTrue (CMPGT(F|D) x y))
 (Geq(32|64)F     x y) -> (FPFlagTrue (CMPGE(F|D) x y))
 (OffPtr [off] ptr:(SP)) -> (MOVVaddr [off] ptr)
 (OffPtr [off] ptr) -> (ADDVconst [off] ptr)
@ -224,70 +221,70 @@
 (LocalAddr {sym} base _) -> (MOVVaddr {sym} base)
 // loads
-(Load <t> ptr mem) && t.IsBoolean() -> (MOVBUload ptr mem)
+(Load <t> ptr mem) && t.IsBoolean() => (MOVBUload ptr mem)
-(Load <t> ptr mem) && (is8BitInt(t) && isSigned(t)) -> (MOVBload ptr mem)
+(Load <t> ptr mem) && (is8BitInt(t) && isSigned(t)) => (MOVBload ptr mem)
-(Load <t> ptr mem) && (is8BitInt(t) && !isSigned(t)) -> (MOVBUload ptr mem)
+(Load <t> ptr mem) && (is8BitInt(t) && !isSigned(t)) => (MOVBUload ptr mem)
-(Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) -> (MOVHload ptr mem)
+(Load <t> ptr mem) && (is16BitInt(t) && isSigned(t)) => (MOVHload ptr mem)
-(Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) -> (MOVHUload ptr mem)
+(Load <t> ptr mem) && (is16BitInt(t) && !isSigned(t)) => (MOVHUload ptr mem)
-(Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) -> (MOVWload ptr mem)
+(Load <t> ptr mem) && (is32BitInt(t) && isSigned(t)) => (MOVWload ptr mem)
-(Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) -> (MOVWUload ptr mem)
+(Load <t> ptr mem) && (is32BitInt(t) && !isSigned(t)) => (MOVWUload ptr mem)
-(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) -> (MOVVload ptr mem)
+(Load <t> ptr mem) && (is64BitInt(t) || isPtr(t)) => (MOVVload ptr mem)
-(Load <t> ptr mem) && is32BitFloat(t) -> (MOVFload ptr mem)
+(Load <t> ptr mem) && is32BitFloat(t) => (MOVFload ptr mem)
-(Load <t> ptr mem) && is64BitFloat(t) -> (MOVDload ptr mem)
+(Load <t> ptr mem) && is64BitFloat(t) => (MOVDload ptr mem)
 // stores
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 1 -> (MOVBstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 1 => (MOVBstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 2 -> (MOVHstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && !is32BitFloat(val.Type) -> (MOVWstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && !is64BitFloat(val.Type) -> (MOVVstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVVstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 4 && is32BitFloat(val.Type) -> (MOVFstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (MOVFstore ptr val mem)
-(Store {t} ptr val mem) && t.(*types.Type).Size() == 8 && is64BitFloat(val.Type) -> (MOVDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (MOVDstore ptr val mem)
 // zeroing
-(Zero [0] _ mem) -> mem
+(Zero [0] _ mem) => mem
-(Zero [1] ptr mem) -> (MOVBstore ptr (MOVVconst [0]) mem)
+(Zero [1] ptr mem) => (MOVBstore ptr (MOVVconst [0]) mem)
-(Zero [2] {t} ptr mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore ptr (MOVVconst [0]) mem)
-(Zero [2] ptr mem) ->
+(Zero [2] ptr mem) =>
 	(MOVBstore [1] ptr (MOVVconst [0])
 		(MOVBstore [0] ptr (MOVVconst [0]) mem))
-(Zero [4] {t} ptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore ptr (MOVVconst [0]) mem)
-(Zero [4] {t} ptr mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [2] ptr (MOVVconst [0])
 		(MOVHstore [0] ptr (MOVVconst [0]) mem))
-(Zero [4] ptr mem) ->
+(Zero [4] ptr mem) =>
 	(MOVBstore [3] ptr (MOVVconst [0])
 		(MOVBstore [2] ptr (MOVVconst [0])
 			(MOVBstore [1] ptr (MOVVconst [0])
 				(MOVBstore [0] ptr (MOVVconst [0]) mem))))
-(Zero [8] {t} ptr mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore ptr (MOVVconst [0]) mem)
-(Zero [8] {t} ptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore [4] ptr (MOVVconst [0])
 		(MOVWstore [0] ptr (MOVVconst [0]) mem))
-(Zero [8] {t} ptr mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [6] ptr (MOVVconst [0])
 		(MOVHstore [4] ptr (MOVVconst [0])
 			(MOVHstore [2] ptr (MOVVconst [0])
 				(MOVHstore [0] ptr (MOVVconst [0]) mem))))
-(Zero [3] ptr mem) ->
+(Zero [3] ptr mem) =>
 	(MOVBstore [2] ptr (MOVVconst [0])
 		(MOVBstore [1] ptr (MOVVconst [0])
 			(MOVBstore [0] ptr (MOVVconst [0]) mem)))
-(Zero [6] {t} ptr mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [4] ptr (MOVVconst [0])
 		(MOVHstore [2] ptr (MOVVconst [0])
 			(MOVHstore [0] ptr (MOVVconst [0]) mem)))
-(Zero [12] {t} ptr mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore [8] ptr (MOVVconst [0])
 		(MOVWstore [4] ptr (MOVVconst [0])
 			(MOVWstore [0] ptr (MOVVconst [0]) mem)))
-(Zero [16] {t} ptr mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore [8] ptr (MOVVconst [0])
 		(MOVVstore [0] ptr (MOVVconst [0]) mem))
-(Zero [24] {t} ptr mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore [16] ptr (MOVVconst [0])
 		(MOVVstore [8] ptr (MOVVconst [0])
 			(MOVVstore [0] ptr (MOVVconst [0]) mem)))
@ -296,70 +293,70 @@
 // 8, and 128 are magic constants, see runtime/mkduff.go
 (Zero [s] {t} ptr mem)
 	&& s%8 == 0 && s > 24 && s <= 8*128
-	&& t.(*types.Type).Alignment()%8 == 0 && !config.noDuffDevice ->
+	&& t.Alignment()%8 == 0 && !config.noDuffDevice =>
 	(DUFFZERO [8 * (128 - s/8)] ptr mem)
 // large or unaligned zeroing uses a loop
 (Zero [s] {t} ptr mem)
-	&& (s > 8*128 || config.noDuffDevice) || t.(*types.Type).Alignment()%8 != 0 ->
+	&& (s > 8*128 || config.noDuffDevice) || t.Alignment()%8 != 0 =>
-	(LoweredZero [t.(*types.Type).Alignment()]
+	(LoweredZero [t.Alignment()]
 		ptr
-		(ADDVconst <ptr.Type> ptr [s-moveSize(t.(*types.Type).Alignment(), config)])
+		(ADDVconst <ptr.Type> ptr [s-moveSize(t.Alignment(), config)])
 		mem)
 // moves
-(Move [0] _ _ mem) -> mem
+(Move [0] _ _ mem) => mem
-(Move [1] dst src mem) -> (MOVBstore dst (MOVBload src mem) mem)
+(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
-(Move [2] {t} dst src mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore dst (MOVHload src mem) mem)
-(Move [2] dst src mem) ->
+(Move [2] dst src mem) =>
 	(MOVBstore [1] dst (MOVBload [1] src mem)
 		(MOVBstore dst (MOVBload src mem) mem))
-(Move [4] {t} dst src mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore dst (MOVWload src mem) mem)
-(Move [4] {t} dst src mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [2] dst (MOVHload [2] src mem)
 		(MOVHstore dst (MOVHload src mem) mem))
-(Move [4] dst src mem) ->
+(Move [4] dst src mem) =>
 	(MOVBstore [3] dst (MOVBload [3] src mem)
 		(MOVBstore [2] dst (MOVBload [2] src mem)
 			(MOVBstore [1] dst (MOVBload [1] src mem)
 				(MOVBstore dst (MOVBload src mem) mem))))
-(Move [8] {t} dst src mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore dst (MOVVload src mem) mem)
-(Move [8] {t} dst src mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore [4] dst (MOVWload [4] src mem)
 		(MOVWstore dst (MOVWload src mem) mem))
-(Move [8] {t} dst src mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [6] dst (MOVHload [6] src mem)
 		(MOVHstore [4] dst (MOVHload [4] src mem)
 			(MOVHstore [2] dst (MOVHload [2] src mem)
 				(MOVHstore dst (MOVHload src mem) mem))))
-(Move [3] dst src mem) ->
+(Move [3] dst src mem) =>
 	(MOVBstore [2] dst (MOVBload [2] src mem)
 		(MOVBstore [1] dst (MOVBload [1] src mem)
 			(MOVBstore dst (MOVBload src mem) mem)))
-(Move [6] {t} dst src mem) && t.(*types.Type).Alignment()%2 == 0 ->
+(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 =>
 	(MOVHstore [4] dst (MOVHload [4] src mem)
 		(MOVHstore [2] dst (MOVHload [2] src mem)
 			(MOVHstore dst (MOVHload src mem) mem)))
-(Move [12] {t} dst src mem) && t.(*types.Type).Alignment()%4 == 0 ->
+(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
 	(MOVWstore [8] dst (MOVWload [8] src mem)
 		(MOVWstore [4] dst (MOVWload [4] src mem)
 			(MOVWstore dst (MOVWload src mem) mem)))
-(Move [16] {t} dst src mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore [8] dst (MOVVload [8] src mem)
 		(MOVVstore dst (MOVVload src mem) mem))
-(Move [24] {t} dst src mem) && t.(*types.Type).Alignment()%8 == 0 ->
+(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
 	(MOVVstore [16] dst (MOVVload [16] src mem)
 		(MOVVstore [8] dst (MOVVload [8] src mem)
 			(MOVVstore dst (MOVVload src mem) mem)))
 // medium move uses a duff device
 (Move [s] {t} dst src mem)
-	&& s%8 == 0 && s >= 24 && s <= 8*128 && t.(*types.Type).Alignment()%8 == 0
+	&& s%8 == 0 && s >= 24 && s <= 8*128 && t.Alignment()%8 == 0
-	&& !config.noDuffDevice ->
+	&& !config.noDuffDevice && logLargeCopy(v, s)  =>
 	(DUFFCOPY [16 * (128 - s/8)] dst src mem)
 // 16 and 128 are magic constants.  16 is the number of bytes to encode:
 //	MOVV	(R1), R23
@ -370,17 +367,17 @@
 // large or unaligned move uses a loop
 (Move [s] {t} dst src mem)
-	&& s > 24 || t.(*types.Type).Alignment()%8 != 0 ->
+	&& s > 24 && logLargeCopy(v, s) || t.Alignment()%8 != 0 =>
-	(LoweredMove [t.(*types.Type).Alignment()]
+	(LoweredMove [t.Alignment()]
 		dst
 		src
-		(ADDVconst <src.Type> src [s-moveSize(t.(*types.Type).Alignment(), config)])
+		(ADDVconst <src.Type> src [s-moveSize(t.Alignment(), config)])
 		mem)
 // calls
-(StaticCall ...) -> (CALLstatic ...)
+(StaticCall ...) => (CALLstatic ...)
-(ClosureCall ...) -> (CALLclosure ...)
+(ClosureCall ...) => (CALLclosure ...)
-(InterCall ...) -> (CALLinter ...)
+(InterCall ...) => (CALLinter ...)
 // atomic intrinsics
 (AtomicLoad8   ...) -> (LoweredAtomicLoad8  ...)
@ -403,48 +400,48 @@
 (AtomicCompareAndSwap64 ...) -> (LoweredAtomicCas64 ...)
 // checks
-(NilCheck ...) -> (LoweredNilCheck ...)
+(NilCheck ...) => (LoweredNilCheck ...)
-(IsNonNil ptr) -> (SGTU ptr (MOVVconst [0]))
+(IsNonNil ptr) => (SGTU ptr (MOVVconst [0]))
-(IsInBounds idx len) -> (SGTU len idx)
+(IsInBounds idx len) => (SGTU len idx)
-(IsSliceInBounds idx len) -> (XOR (MOVVconst [1]) (SGTU idx len))
+(IsSliceInBounds idx len) => (XOR (MOVVconst [1]) (SGTU idx len))
 // pseudo-ops
-(GetClosurePtr ...) -> (LoweredGetClosurePtr ...)
+(GetClosurePtr ...) => (LoweredGetClosurePtr ...)
-(GetCallerSP ...) -> (LoweredGetCallerSP ...)
+(GetCallerSP ...) => (LoweredGetCallerSP ...)
-(GetCallerPC ...) -> (LoweredGetCallerPC ...)
+(GetCallerPC ...) => (LoweredGetCallerPC ...)
-(If cond yes no) -> (NE cond yes no)
+(If cond yes no) => (NE cond yes no)
 // Write barrier.
-(WB ...) -> (LoweredWB ...)
+(WB ...) => (LoweredWB ...)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 -> (LoweredPanicBoundsA [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 -> (LoweredPanicBoundsB [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
-(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 -> (LoweredPanicBoundsC [kind] x y mem)
+(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
 // Optimizations
 // Absorb boolean tests into block
-(NE (FPFlagTrue cmp) yes no) -> (FPT cmp yes no)
+(NE (FPFlagTrue cmp) yes no) => (FPT cmp yes no)
-(NE (FPFlagFalse cmp) yes no) -> (FPF cmp yes no)
+(NE (FPFlagFalse cmp) yes no) => (FPF cmp yes no)
-(EQ (FPFlagTrue cmp) yes no) -> (FPF cmp yes no)
+(EQ (FPFlagTrue cmp) yes no) => (FPF cmp yes no)
-(EQ (FPFlagFalse cmp) yes no) -> (FPT cmp yes no)
+(EQ (FPFlagFalse cmp) yes no) => (FPT cmp yes no)
-(NE (XORconst [1] cmp:(SGT _ _)) yes no) -> (EQ cmp yes no)
+(NE (XORconst [1] cmp:(SGT _ _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTU _ _)) yes no) -> (EQ cmp yes no)
+(NE (XORconst [1] cmp:(SGTU _ _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTconst _)) yes no) -> (EQ cmp yes no)
+(NE (XORconst [1] cmp:(SGTconst _)) yes no) => (EQ cmp yes no)
-(NE (XORconst [1] cmp:(SGTUconst _)) yes no) -> (EQ cmp yes no)
+(NE (XORconst [1] cmp:(SGTUconst _)) yes no) => (EQ cmp yes no)
-(EQ (XORconst [1] cmp:(SGT _ _)) yes no) -> (NE cmp yes no)
+(EQ (XORconst [1] cmp:(SGT _ _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTU _ _)) yes no) -> (NE cmp yes no)
+(EQ (XORconst [1] cmp:(SGTU _ _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTconst _)) yes no) -> (NE cmp yes no)
+(EQ (XORconst [1] cmp:(SGTconst _)) yes no) => (NE cmp yes no)
-(EQ (XORconst [1] cmp:(SGTUconst _)) yes no) -> (NE cmp yes no)
+(EQ (XORconst [1] cmp:(SGTUconst _)) yes no) => (NE cmp yes no)
-(NE (SGTUconst [1] x) yes no) -> (EQ x yes no)
+(NE (SGTUconst [1] x) yes no) => (EQ x yes no)
-(EQ (SGTUconst [1] x) yes no) -> (NE x yes no)
+(EQ (SGTUconst [1] x) yes no) => (NE x yes no)
-(NE (SGTU x (MOVVconst [0])) yes no) -> (NE x yes no)
+(NE (SGTU x (MOVVconst [0])) yes no) => (NE x yes no)
-(EQ (SGTU x (MOVVconst [0])) yes no) -> (EQ x yes no)
+(EQ (SGTU x (MOVVconst [0])) yes no) => (EQ x yes no)
-(NE (SGTconst [0] x) yes no) -> (LTZ x yes no)
+(NE (SGTconst [0] x) yes no) => (LTZ x yes no)
-(EQ (SGTconst [0] x) yes no) -> (GEZ x yes no)
+(EQ (SGTconst [0] x) yes no) => (GEZ x yes no)
-(NE (SGT x (MOVVconst [0])) yes no) -> (GTZ x yes no)
+(NE (SGT x (MOVVconst [0])) yes no) => (GTZ x yes no)
-(EQ (SGT x (MOVVconst [0])) yes no) -> (LEZ x yes no)
+(EQ (SGT x (MOVVconst [0])) yes no) => (LEZ x yes no)
 // fold offset into address
 (ADDVconst [off1] (MOVVaddr [off2] {sym} ptr)) -> (MOVVaddr [off1+off2] {sym} ptr)
@ -512,178 +509,178 @@
 	(MOVVstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
 // store zero
-(MOVBstore [off] {sym} ptr (MOVVconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
+(MOVBstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
-(MOVHstore [off] {sym} ptr (MOVVconst [0]) mem) -> (MOVHstorezero [off] {sym} ptr mem)
+(MOVHstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVHstorezero [off] {sym} ptr mem)
-(MOVWstore [off] {sym} ptr (MOVVconst [0]) mem) -> (MOVWstorezero [off] {sym} ptr mem)
+(MOVWstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
-(MOVVstore [off] {sym} ptr (MOVVconst [0]) mem) -> (MOVVstorezero [off] {sym} ptr mem)
+(MOVVstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVVstorezero [off] {sym} ptr mem)
 // don't extend after proper load
-(MOVBreg x:(MOVBload _ _)) -> (MOVVreg x)
+(MOVBreg x:(MOVBload _ _)) => (MOVVreg x)
-(MOVBUreg x:(MOVBUload _ _)) -> (MOVVreg x)
+(MOVBUreg x:(MOVBUload _ _)) => (MOVVreg x)
-(MOVHreg x:(MOVBload _ _)) -> (MOVVreg x)
+(MOVHreg x:(MOVBload _ _)) => (MOVVreg x)
-(MOVHreg x:(MOVBUload _ _)) -> (MOVVreg x)
+(MOVHreg x:(MOVBUload _ _)) => (MOVVreg x)
-(MOVHreg x:(MOVHload _ _)) -> (MOVVreg x)
+(MOVHreg x:(MOVHload _ _)) => (MOVVreg x)
-(MOVHUreg x:(MOVBUload _ _)) -> (MOVVreg x)
+(MOVHUreg x:(MOVBUload _ _)) => (MOVVreg x)
-(MOVHUreg x:(MOVHUload _ _)) -> (MOVVreg x)
+(MOVHUreg x:(MOVHUload _ _)) => (MOVVreg x)
-(MOVWreg x:(MOVBload _ _)) -> (MOVVreg x)
+(MOVWreg x:(MOVBload _ _)) => (MOVVreg x)
-(MOVWreg x:(MOVBUload _ _)) -> (MOVVreg x)
+(MOVWreg x:(MOVBUload _ _)) => (MOVVreg x)
-(MOVWreg x:(MOVHload _ _)) -> (MOVVreg x)
+(MOVWreg x:(MOVHload _ _)) => (MOVVreg x)
-(MOVWreg x:(MOVHUload _ _)) -> (MOVVreg x)
+(MOVWreg x:(MOVHUload _ _)) => (MOVVreg x)
-(MOVWreg x:(MOVWload _ _)) -> (MOVVreg x)
+(MOVWreg x:(MOVWload _ _)) => (MOVVreg x)
-(MOVWUreg x:(MOVBUload _ _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVBUload _ _)) => (MOVVreg x)
-(MOVWUreg x:(MOVHUload _ _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVHUload _ _)) => (MOVVreg x)
-(MOVWUreg x:(MOVWUload _ _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVWUload _ _)) => (MOVVreg x)
 // fold double extensions
-(MOVBreg x:(MOVBreg _)) -> (MOVVreg x)
+(MOVBreg x:(MOVBreg _)) => (MOVVreg x)
-(MOVBUreg x:(MOVBUreg _)) -> (MOVVreg x)
+(MOVBUreg x:(MOVBUreg _)) => (MOVVreg x)
-(MOVHreg x:(MOVBreg _)) -> (MOVVreg x)
+(MOVHreg x:(MOVBreg _)) => (MOVVreg x)
-(MOVHreg x:(MOVBUreg _)) -> (MOVVreg x)
+(MOVHreg x:(MOVBUreg _)) => (MOVVreg x)
-(MOVHreg x:(MOVHreg _)) -> (MOVVreg x)
+(MOVHreg x:(MOVHreg _)) => (MOVVreg x)
-(MOVHUreg x:(MOVBUreg _)) -> (MOVVreg x)
+(MOVHUreg x:(MOVBUreg _)) => (MOVVreg x)
-(MOVHUreg x:(MOVHUreg _)) -> (MOVVreg x)
+(MOVHUreg x:(MOVHUreg _)) => (MOVVreg x)
-(MOVWreg x:(MOVBreg _)) -> (MOVVreg x)
+(MOVWreg x:(MOVBreg _)) => (MOVVreg x)
-(MOVWreg x:(MOVBUreg _)) -> (MOVVreg x)
+(MOVWreg x:(MOVBUreg _)) => (MOVVreg x)
-(MOVWreg x:(MOVHreg _)) -> (MOVVreg x)
+(MOVWreg x:(MOVHreg _)) => (MOVVreg x)
-(MOVWreg x:(MOVWreg _)) -> (MOVVreg x)
+(MOVWreg x:(MOVWreg _)) => (MOVVreg x)
-(MOVWUreg x:(MOVBUreg _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVBUreg _)) => (MOVVreg x)
-(MOVWUreg x:(MOVHUreg _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVHUreg _)) => (MOVVreg x)
-(MOVWUreg x:(MOVWUreg _)) -> (MOVVreg x)
+(MOVWUreg x:(MOVWUreg _)) => (MOVVreg x)
 // don't extend before store
-(MOVBstore [off] {sym} ptr (MOVBreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVHreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
-(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
 // if a register move has only 1 use, just use the same register without emitting instruction
 // MOVVnop doesn't emit instruction, only for ensuring the type.
-(MOVVreg x) && x.Uses == 1 -> (MOVVnop x)
+(MOVVreg x) && x.Uses == 1 => (MOVVnop x)
 // fold constant into arithmatic ops
-(ADDV x (MOVVconst [c])) && is32Bit(c) -> (ADDVconst [c] x)
+(ADDV x (MOVVconst [c])) && is32Bit(c) => (ADDVconst [c] x)
-(SUBV x (MOVVconst [c])) && is32Bit(c) -> (SUBVconst [c] x)
+(SUBV x (MOVVconst [c])) && is32Bit(c) => (SUBVconst [c] x)
-(AND x (MOVVconst [c])) && is32Bit(c) -> (ANDconst [c] x)
+(AND x (MOVVconst [c])) && is32Bit(c) => (ANDconst [c] x)
-(OR  x (MOVVconst [c])) && is32Bit(c) -> (ORconst  [c] x)
+(OR  x (MOVVconst [c])) && is32Bit(c) => (ORconst  [c] x)
-(XOR x (MOVVconst [c])) && is32Bit(c) -> (XORconst [c] x)
+(XOR x (MOVVconst [c])) && is32Bit(c) => (XORconst [c] x)
-(NOR x (MOVVconst [c])) && is32Bit(c) -> (NORconst [c] x)
+(NOR x (MOVVconst [c])) && is32Bit(c) => (NORconst [c] x)
-(SLLV _ (MOVVconst [c])) && uint64(c)>=64 -> (MOVVconst [0])
+(SLLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0])
-(SRLV _ (MOVVconst [c])) && uint64(c)>=64 -> (MOVVconst [0])
+(SRLV _ (MOVVconst [c])) && uint64(c)>=64 => (MOVVconst [0])
-(SRAV x (MOVVconst [c])) && uint64(c)>=64 -> (SRAVconst x [63])
+(SRAV x (MOVVconst [c])) && uint64(c)>=64 => (SRAVconst x [63])
-(SLLV x (MOVVconst [c])) -> (SLLVconst x [c])
+(SLLV x (MOVVconst [c])) => (SLLVconst x [c])
-(SRLV x (MOVVconst [c])) -> (SRLVconst x [c])
+(SRLV x (MOVVconst [c])) => (SRLVconst x [c])
-(SRAV x (MOVVconst [c])) -> (SRAVconst x [c])
+(SRAV x (MOVVconst [c])) => (SRAVconst x [c])
-(SGT  (MOVVconst [c]) x) && is32Bit(c) -> (SGTconst  [c] x)
+(SGT  (MOVVconst [c]) x) && is32Bit(c) => (SGTconst  [c] x)
-(SGTU (MOVVconst [c]) x) && is32Bit(c) -> (SGTUconst [c] x)
+(SGTU (MOVVconst [c]) x) && is32Bit(c) => (SGTUconst [c] x)
 // mul by constant
-(Select1 (MULVU x (MOVVconst [-1]))) -> (NEGV x)
+(Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x)
-(Select1 (MULVU _ (MOVVconst [0]))) -> (MOVVconst [0])
+(Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0])
-(Select1 (MULVU x (MOVVconst [1]))) -> x
+(Select1 (MULVU x (MOVVconst [1]))) => x
-(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo(c) -> (SLLVconst [log2(c)] x)
+(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SLLVconst [log2(c)] x)
 // div by constant
-(Select1 (DIVVU x (MOVVconst [1]))) -> x
+(Select1 (DIVVU x (MOVVconst [1]))) => x
-(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) -> (SRLVconst [log2(c)] x)
+(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SRLVconst [log2(c)] x)
-(Select0 (DIVVU _ (MOVVconst [1]))) -> (MOVVconst [0])                       // mod
+(Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0])                       // mod
-(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) -> (ANDconst [c-1] x) // mod
+(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod
 // generic simplifications
-(ADDV x (NEGV y)) -> (SUBV x y)
+(ADDV x (NEGV y)) => (SUBV x y)
-(SUBV x x) -> (MOVVconst [0])
+(SUBV x x) => (MOVVconst [0])
-(SUBV (MOVVconst [0]) x) -> (NEGV x)
+(SUBV (MOVVconst [0]) x) => (NEGV x)
-(AND x x) -> x
+(AND x x) => x
-(OR  x x) -> x
+(OR  x x) => x
-(XOR x x) -> (MOVVconst [0])
+(XOR x x) => (MOVVconst [0])
 // remove redundant *const ops
-(ADDVconst [0]  x) -> x
+(ADDVconst [0]  x) => x
-(SUBVconst [0]  x) -> x
+(SUBVconst [0]  x) => x
-(ANDconst [0]  _) -> (MOVVconst [0])
+(ANDconst [0]  _) => (MOVVconst [0])
-(ANDconst [-1] x) -> x
+(ANDconst [-1] x) => x
-(ORconst  [0]  x) -> x
+(ORconst  [0]  x) => x
-(ORconst  [-1] _) -> (MOVVconst [-1])
+(ORconst  [-1] _) => (MOVVconst [-1])
-(XORconst [0]  x) -> x
+(XORconst [0]  x) => x
-(XORconst [-1] x) -> (NORconst [0] x)
+(XORconst [-1] x) => (NORconst [0] x)
 // generic constant folding
-(ADDVconst [c] (MOVVconst [d]))  -> (MOVVconst [c+d])
+(ADDVconst [c] (MOVVconst [d]))  => (MOVVconst [c+d])
-(ADDVconst [c] (ADDVconst [d] x)) && is32Bit(c+d) -> (ADDVconst [c+d] x)
+(ADDVconst [c] (ADDVconst [d] x)) && is32Bit(c+d) => (ADDVconst [c+d] x)
-(ADDVconst [c] (SUBVconst [d] x)) && is32Bit(c-d) -> (ADDVconst [c-d] x)
+(ADDVconst [c] (SUBVconst [d] x)) && is32Bit(c-d) => (ADDVconst [c-d] x)
-(SUBVconst [c] (MOVVconst [d]))  -> (MOVVconst [d-c])
+(SUBVconst [c] (MOVVconst [d]))  => (MOVVconst [d-c])
-(SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) -> (ADDVconst [-c-d] x)
+(SUBVconst [c] (SUBVconst [d] x)) && is32Bit(-c-d) => (ADDVconst [-c-d] x)
-(SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) -> (ADDVconst [-c+d] x)
+(SUBVconst [c] (ADDVconst [d] x)) && is32Bit(-c+d) => (ADDVconst [-c+d] x)
-(SLLVconst [c] (MOVVconst [d]))  -> (MOVVconst [d<<uint64(c)])
+(SLLVconst [c] (MOVVconst [d]))  => (MOVVconst [d<<uint64(c)])
-(SRLVconst [c] (MOVVconst [d]))  -> (MOVVconst [int64(uint64(d)>>uint64(c))])
+(SRLVconst [c] (MOVVconst [d]))  => (MOVVconst [int64(uint64(d)>>uint64(c))])
-(SRAVconst [c] (MOVVconst [d]))  -> (MOVVconst [d>>uint64(c)])
+(SRAVconst [c] (MOVVconst [d]))  => (MOVVconst [d>>uint64(c)])
-(Select1 (MULVU (MOVVconst [c]) (MOVVconst [d]))) -> (MOVVconst [c*d])
+(Select1 (MULVU (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [c*d])
-(Select1 (DIVV  (MOVVconst [c]) (MOVVconst [d]))) -> (MOVVconst [c/d])
+(Select1 (DIVV  (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [c/d])
-(Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) -> (MOVVconst [int64(uint64(c)/uint64(d))])
+(Select1 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [int64(uint64(c)/uint64(d))])
-(Select0 (DIVV  (MOVVconst [c]) (MOVVconst [d]))) -> (MOVVconst [c%d])   // mod
+(Select0 (DIVV  (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [c%d])   // mod
-(Select0 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) -> (MOVVconst [int64(uint64(c)%uint64(d))]) // mod
+(Select0 (DIVVU (MOVVconst [c]) (MOVVconst [d]))) => (MOVVconst [int64(uint64(c)%uint64(d))]) // mod
-(ANDconst [c] (MOVVconst [d])) -> (MOVVconst [c&d])
+(ANDconst [c] (MOVVconst [d])) => (MOVVconst [c&d])
-(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
+(ANDconst [c] (ANDconst [d] x)) => (ANDconst [c&d] x)
-(ORconst [c] (MOVVconst [d])) -> (MOVVconst [c|d])
+(ORconst [c] (MOVVconst [d])) => (MOVVconst [c|d])
-(ORconst [c] (ORconst [d] x)) && is32Bit(c|d) -> (ORconst [c|d] x)
+(ORconst [c] (ORconst [d] x)) && is32Bit(c|d) => (ORconst [c|d] x)
-(XORconst [c] (MOVVconst [d])) -> (MOVVconst [c^d])
+(XORconst [c] (MOVVconst [d])) => (MOVVconst [c^d])
-(XORconst [c] (XORconst [d] x)) && is32Bit(c^d) -> (XORconst [c^d] x)
+(XORconst [c] (XORconst [d] x)) && is32Bit(c^d) => (XORconst [c^d] x)
-(NORconst [c] (MOVVconst [d])) -> (MOVVconst [^(c|d)])
+(NORconst [c] (MOVVconst [d])) => (MOVVconst [^(c|d)])
-(NEGV (MOVVconst [c])) -> (MOVVconst [-c])
+(NEGV (MOVVconst [c])) => (MOVVconst [-c])
-(MOVBreg  (MOVVconst [c])) -> (MOVVconst [int64(int8(c))])
+(MOVBreg  (MOVVconst [c])) => (MOVVconst [int64(int8(c))])
-(MOVBUreg (MOVVconst [c])) -> (MOVVconst [int64(uint8(c))])
+(MOVBUreg (MOVVconst [c])) => (MOVVconst [int64(uint8(c))])
-(MOVHreg  (MOVVconst [c])) -> (MOVVconst [int64(int16(c))])
+(MOVHreg  (MOVVconst [c])) => (MOVVconst [int64(int16(c))])
-(MOVHUreg (MOVVconst [c])) -> (MOVVconst [int64(uint16(c))])
+(MOVHUreg (MOVVconst [c])) => (MOVVconst [int64(uint16(c))])
-(MOVWreg  (MOVVconst [c])) -> (MOVVconst [int64(int32(c))])
+(MOVWreg  (MOVVconst [c])) => (MOVVconst [int64(int32(c))])
-(MOVWUreg (MOVVconst [c])) -> (MOVVconst [int64(uint32(c))])
+(MOVWUreg (MOVVconst [c])) => (MOVVconst [int64(uint32(c))])
-(MOVVreg  (MOVVconst [c])) -> (MOVVconst [c])
+(MOVVreg  (MOVVconst [c])) => (MOVVconst [c])
 (LoweredAtomicStore32 ptr (MOVVconst [0]) mem) -> (LoweredAtomicStorezero32 ptr mem)
 (LoweredAtomicStore64 ptr (MOVVconst [0]) mem) -> (LoweredAtomicStorezero64 ptr mem)
 (LoweredAtomicAdd32 ptr (MOVVconst [c]) mem) && is32Bit(c) -> (LoweredAtomicAddconst32 [c] ptr mem)
 (LoweredAtomicAdd64 ptr (MOVVconst [c]) mem) && is32Bit(c) -> (LoweredAtomicAddconst64 [c] ptr mem)
 // constant comparisons
-(SGTconst [c] (MOVVconst [d])) && c>d -> (MOVVconst [1])
+(SGTconst [c] (MOVVconst [d])) && c>d => (MOVVconst [1])
-(SGTconst [c] (MOVVconst [d])) && c<=d -> (MOVVconst [0])
+(SGTconst [c] (MOVVconst [d])) && c<=d => (MOVVconst [0])
-(SGTUconst [c] (MOVVconst [d])) && uint64(c)>uint64(d) -> (MOVVconst [1])
+(SGTUconst [c] (MOVVconst [d])) && uint64(c)>uint64(d) => (MOVVconst [1])
-(SGTUconst [c] (MOVVconst [d])) && uint64(c)<=uint64(d) -> (MOVVconst [0])
+(SGTUconst [c] (MOVVconst [d])) && uint64(c)<=uint64(d) => (MOVVconst [0])
 // other known comparisons
-(SGTconst [c] (MOVBreg _)) && 0x7f < c -> (MOVVconst [1])
+(SGTconst [c] (MOVBreg _)) && 0x7f < c => (MOVVconst [1])
-(SGTconst [c] (MOVBreg _)) && c <= -0x80 -> (MOVVconst [0])
+(SGTconst [c] (MOVBreg _)) && c <= -0x80 => (MOVVconst [0])
-(SGTconst [c] (MOVBUreg _)) && 0xff < c -> (MOVVconst [1])
+(SGTconst [c] (MOVBUreg _)) && 0xff < c => (MOVVconst [1])
-(SGTconst [c] (MOVBUreg _)) && c < 0 -> (MOVVconst [0])
+(SGTconst [c] (MOVBUreg _)) && c < 0 => (MOVVconst [0])
-(SGTUconst [c] (MOVBUreg _)) && 0xff < uint64(c) -> (MOVVconst [1])
+(SGTUconst [c] (MOVBUreg _)) && 0xff < uint64(c) => (MOVVconst [1])
-(SGTconst [c] (MOVHreg _)) && 0x7fff < c -> (MOVVconst [1])
+(SGTconst [c] (MOVHreg _)) && 0x7fff < c => (MOVVconst [1])
-(SGTconst [c] (MOVHreg _)) && c <= -0x8000 -> (MOVVconst [0])
+(SGTconst [c] (MOVHreg _)) && c <= -0x8000 => (MOVVconst [0])
-(SGTconst [c] (MOVHUreg _)) && 0xffff < c -> (MOVVconst [1])
+(SGTconst [c] (MOVHUreg _)) && 0xffff < c => (MOVVconst [1])
-(SGTconst [c] (MOVHUreg _)) && c < 0 -> (MOVVconst [0])
+(SGTconst [c] (MOVHUreg _)) && c < 0 => (MOVVconst [0])
-(SGTUconst [c] (MOVHUreg _)) && 0xffff < uint64(c) -> (MOVVconst [1])
+(SGTUconst [c] (MOVHUreg _)) && 0xffff < uint64(c) => (MOVVconst [1])
-(SGTconst [c] (MOVWUreg _)) && c < 0 -> (MOVVconst [0])
+(SGTconst [c] (MOVWUreg _)) && c < 0 => (MOVVconst [0])
-(SGTconst [c] (ANDconst [m] _)) && 0 <= m && m < c -> (MOVVconst [1])
+(SGTconst [c] (ANDconst [m] _)) && 0 <= m && m < c => (MOVVconst [1])
-(SGTUconst [c] (ANDconst [m] _)) && uint64(m) < uint64(c) -> (MOVVconst [1])
+(SGTUconst [c] (ANDconst [m] _)) && uint64(m) < uint64(c) => (MOVVconst [1])
-(SGTconst [c] (SRLVconst _ [d])) && 0 <= c && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) -> (MOVVconst [1])
+(SGTconst [c] (SRLVconst _ [d])) && 0 <= c && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1])
-(SGTUconst [c] (SRLVconst _ [d])) && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) -> (MOVVconst [1])
+(SGTUconst [c] (SRLVconst _ [d])) && 0 < d && d <= 63 && 0xffffffffffffffff>>uint64(d) < uint64(c) => (MOVVconst [1])
 // absorb constants into branches
-(EQ  (MOVVconst [0]) yes no) -> (First yes no)
+(EQ  (MOVVconst [0]) yes no) => (First yes no)
-(EQ  (MOVVconst [c]) yes no) && c != 0 -> (First no yes)
+(EQ  (MOVVconst [c]) yes no) && c != 0 => (First no yes)
-(NE  (MOVVconst [0]) yes no) -> (First no yes)
+(NE  (MOVVconst [0]) yes no) => (First no yes)
-(NE  (MOVVconst [c]) yes no) && c != 0 -> (First yes no)
+(NE  (MOVVconst [c]) yes no) && c != 0 => (First yes no)
-(LTZ (MOVVconst [c]) yes no) && c <  0 -> (First yes no)
+(LTZ (MOVVconst [c]) yes no) && c <  0 => (First yes no)
-(LTZ (MOVVconst [c]) yes no) && c >= 0 -> (First no yes)
+(LTZ (MOVVconst [c]) yes no) && c >= 0 => (First no yes)
-(LEZ (MOVVconst [c]) yes no) && c <= 0 -> (First yes no)
+(LEZ (MOVVconst [c]) yes no) && c <= 0 => (First yes no)
-(LEZ (MOVVconst [c]) yes no) && c >  0 -> (First no yes)
+(LEZ (MOVVconst [c]) yes no) && c >  0 => (First no yes)
-(GTZ (MOVVconst [c]) yes no) && c >  0 -> (First yes no)
+(GTZ (MOVVconst [c]) yes no) && c >  0 => (First yes no)
-(GTZ (MOVVconst [c]) yes no) && c <= 0 -> (First no yes)
+(GTZ (MOVVconst [c]) yes no) && c <= 0 => (First no yes)
-(GEZ (MOVVconst [c]) yes no) && c >= 0 -> (First yes no)
+(GEZ (MOVVconst [c]) yes no) && c >= 0 => (First yes no)
-(GEZ (MOVVconst [c]) yes no) && c <  0 -> (First no yes)
+(GEZ (MOVVconst [c]) yes no) && c <  0 => (First no yes)
--- a/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64Ops.go
@ -450,9 +450,9 @@ func init() {
 		// There are three of these functions so that they can have three different register inputs.
 		// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
 		// default registers to match so we don't need to copy registers around unnecessarily.
-		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r3, r4}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
-		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem"}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
+		{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in genericOps.go).
 	}
 	blocks := []blockData{
--- a/Show more
+++ b/Show more