diff --git a/api/next.txt b/api/next.txt
index 3184a8ceaed..959172242e1 100644
--- a/api/next.txt
+++ b/api/next.txt
@@ -1,3 +1,5 @@
+pkg archive/zip, method (*ReadCloser) Open(string) (fs.File, error)
+pkg archive/zip, method (*Reader) Open(string) (fs.File, error)
pkg debug/elf, const DT_ADDRRNGHI = 1879047935
pkg debug/elf, const DT_ADDRRNGHI DynTag
pkg debug/elf, const DT_ADDRRNGLO = 1879047680
@@ -216,13 +218,25 @@ pkg debug/elf, const PT_SUNWSTACK = 1879048187
pkg debug/elf, const PT_SUNWSTACK ProgType
pkg debug/elf, const PT_SUNW_EH_FRAME = 1685382480
pkg debug/elf, const PT_SUNW_EH_FRAME ProgType
+pkg embed, method (FS) Open(string) (fs.File, error)
+pkg embed, method (FS) ReadDir(string) ([]fs.DirEntry, error)
+pkg embed, method (FS) ReadFile(string) ([]uint8, error)
+pkg embed, type FS struct
pkg flag, func Func(string, string, func(string) error)
pkg flag, method (*FlagSet) Func(string, string, func(string) error)
+pkg go/build, type Package struct, EmbedPatterns []string
pkg go/build, type Package struct, IgnoredOtherFiles []string
+pkg go/build, type Package struct, TestEmbedPatterns []string
+pkg go/build, type Package struct, XTestEmbedPatterns []string
+pkg html/template, func ParseFS(fs.FS, ...string) (*Template, error)
+pkg html/template, method (*Template) ParseFS(fs.FS, ...string) (*Template, error)
+pkg io, func NopCloser(Reader) ReadCloser
+pkg io, func ReadAll(Reader) ([]uint8, error)
pkg io, type ReadSeekCloser interface { Close, Read, Seek }
pkg io, type ReadSeekCloser interface, Close() error
pkg io, type ReadSeekCloser interface, Read([]uint8) (int, error)
pkg io, type ReadSeekCloser interface, Seek(int64, int) (int64, error)
+pkg io, var Discard Writer
pkg io/fs, const ModeAppend = 1073741824
pkg io/fs, const ModeAppend FileMode
pkg io/fs, const ModeCharDevice = 2097152
@@ -253,6 +267,11 @@ pkg io/fs, const ModeTemporary = 268435456
pkg io/fs, const ModeTemporary FileMode
pkg io/fs, const ModeType = 2401763328
pkg io/fs, const ModeType FileMode
+pkg io/fs, func Glob(FS, string) ([]string, error)
+pkg io/fs, func ReadDir(FS, string) ([]DirEntry, error)
+pkg io/fs, func ReadFile(FS, string) ([]uint8, error)
+pkg io/fs, func Stat(FS, string) (FileInfo, error)
+pkg io/fs, func ValidPath(string) bool
pkg io/fs, method (*PathError) Error() string
pkg io/fs, method (*PathError) Timeout() bool
pkg io/fs, method (*PathError) Unwrap() error
@@ -261,6 +280,17 @@ pkg io/fs, method (FileMode) IsRegular() bool
pkg io/fs, method (FileMode) Perm() FileMode
pkg io/fs, method (FileMode) String() string
pkg io/fs, method (FileMode) Type() FileMode
+pkg io/fs, type DirEntry interface { Info, IsDir, Name, Type }
+pkg io/fs, type DirEntry interface, Info() (FileInfo, error)
+pkg io/fs, type DirEntry interface, IsDir() bool
+pkg io/fs, type DirEntry interface, Name() string
+pkg io/fs, type DirEntry interface, Type() FileMode
+pkg io/fs, type FS interface { Open }
+pkg io/fs, type FS interface, Open(string) (File, error)
+pkg io/fs, type File interface { Close, Read, Stat }
+pkg io/fs, type File interface, Close() error
+pkg io/fs, type File interface, Read([]uint8) (int, error)
+pkg io/fs, type File interface, Stat() (FileInfo, error)
pkg io/fs, type FileInfo interface { IsDir, ModTime, Mode, Name, Size, Sys }
pkg io/fs, type FileInfo interface, IsDir() bool
pkg io/fs, type FileInfo interface, ModTime() time.Time
@@ -269,16 +299,35 @@ pkg io/fs, type FileInfo interface, Name() string
pkg io/fs, type FileInfo interface, Size() int64
pkg io/fs, type FileInfo interface, Sys() interface{}
pkg io/fs, type FileMode uint32
+pkg io/fs, type GlobFS interface { Glob, Open }
+pkg io/fs, type GlobFS interface, Glob(string) ([]string, error)
+pkg io/fs, type GlobFS interface, Open(string) (File, error)
pkg io/fs, type PathError struct
pkg io/fs, type PathError struct, Err error
pkg io/fs, type PathError struct, Op string
pkg io/fs, type PathError struct, Path string
+pkg io/fs, type ReadDirFS interface { Open, ReadDir }
+pkg io/fs, type ReadDirFS interface, Open(string) (File, error)
+pkg io/fs, type ReadDirFS interface, ReadDir(string) ([]DirEntry, error)
+pkg io/fs, type ReadDirFile interface { Close, Read, ReadDir, Stat }
+pkg io/fs, type ReadDirFile interface, Close() error
+pkg io/fs, type ReadDirFile interface, Read([]uint8) (int, error)
+pkg io/fs, type ReadDirFile interface, ReadDir(int) ([]DirEntry, error)
+pkg io/fs, type ReadDirFile interface, Stat() (FileInfo, error)
+pkg io/fs, type ReadFileFS interface { Open, ReadFile }
+pkg io/fs, type ReadFileFS interface, Open(string) (File, error)
+pkg io/fs, type ReadFileFS interface, ReadFile(string) ([]uint8, error)
+pkg io/fs, type StatFS interface { Open, Stat }
+pkg io/fs, type StatFS interface, Open(string) (File, error)
+pkg io/fs, type StatFS interface, Stat(string) (FileInfo, error)
pkg io/fs, var ErrClosed error
pkg io/fs, var ErrExist error
pkg io/fs, var ErrInvalid error
pkg io/fs, var ErrNotExist error
pkg io/fs, var ErrPermission error
+pkg log, func Default() *Logger
pkg net, var ErrClosed error
+pkg net/http, func FS(fs.FS) FileSystem
pkg net/http, type Transport struct, GetProxyConnectHeader func(context.Context, *url.URL, string) (Header, error)
pkg os, const ModeAppend fs.FileMode
pkg os, const ModeCharDevice fs.FileMode
@@ -296,6 +345,7 @@ pkg os, const ModeSymlink fs.FileMode
pkg os, const ModeTemporary fs.FileMode
pkg os, const ModeType fs.FileMode
pkg os, func Chmod(string, fs.FileMode) error
+pkg os, func DirFS(string) fs.FS
pkg os, func Lstat(string) (fs.FileInfo, error)
pkg os, func Mkdir(string, fs.FileMode) error
pkg os, func MkdirAll(string, fs.FileMode) error
@@ -303,19 +353,84 @@ pkg os, func OpenFile(string, int, fs.FileMode) (*File, error)
pkg os, func SameFile(fs.FileInfo, fs.FileInfo) bool
pkg os, func Stat(string) (fs.FileInfo, error)
pkg os, method (*File) Chmod(fs.FileMode) error
-pkg os, method (*File) ReadDir(int) ([]DirEntry, error)
+pkg os, method (*File) ReadDir(int) ([]fs.DirEntry, error)
pkg os, method (*File) Readdir(int) ([]fs.FileInfo, error)
pkg os, method (*File) Stat() (fs.FileInfo, error)
-pkg os, type DirEntry interface { Info, IsDir, Name, Type }
-pkg os, type DirEntry interface, Info() (fs.FileInfo, error)
-pkg os, type DirEntry interface, IsDir() bool
-pkg os, type DirEntry interface, Name() string
-pkg os, type DirEntry interface, Type() fs.FileMode
+pkg os, type DirEntry = fs.DirEntry
pkg os, type FileInfo = fs.FileInfo
pkg os, type FileMode = fs.FileMode
pkg os, type PathError = fs.PathError
pkg os/signal, func NotifyContext(context.Context, ...os.Signal) (context.Context, context.CancelFunc)
+pkg runtime/metrics, const KindBad = 0
+pkg runtime/metrics, const KindBad ValueKind
+pkg runtime/metrics, const KindFloat64 = 2
+pkg runtime/metrics, const KindFloat64 ValueKind
+pkg runtime/metrics, const KindFloat64Histogram = 3
+pkg runtime/metrics, const KindFloat64Histogram ValueKind
+pkg runtime/metrics, const KindUint64 = 1
+pkg runtime/metrics, const KindUint64 ValueKind
+pkg runtime/metrics, func All() []Description
+pkg runtime/metrics, func Read([]Sample)
+pkg runtime/metrics, method (Value) Float64() float64
+pkg runtime/metrics, method (Value) Float64Histogram() *Float64Histogram
+pkg runtime/metrics, method (Value) Kind() ValueKind
+pkg runtime/metrics, method (Value) Uint64() uint64
+pkg runtime/metrics, type Description struct
+pkg runtime/metrics, type Description struct, Cumulative bool
+pkg runtime/metrics, type Description struct, Description string
+pkg runtime/metrics, type Description struct, Kind ValueKind
+pkg runtime/metrics, type Description struct, Name string
+pkg runtime/metrics, type Description struct, StopTheWorld bool
+pkg runtime/metrics, type Float64Histogram struct
+pkg runtime/metrics, type Float64Histogram struct, Buckets []float64
+pkg runtime/metrics, type Float64Histogram struct, Counts []uint64
+pkg runtime/metrics, type Sample struct
+pkg runtime/metrics, type Sample struct, Name string
+pkg runtime/metrics, type Sample struct, Value Value
+pkg runtime/metrics, type Value struct
+pkg runtime/metrics, type ValueKind int
+pkg syscall (linux-386), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-386), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-386), func Setegid(int) error
+pkg syscall (linux-386), func Seteuid(int) error
+pkg syscall (linux-386-cgo), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-386-cgo), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-386-cgo), func Setegid(int) error
+pkg syscall (linux-386-cgo), func Seteuid(int) error
+pkg syscall (linux-amd64), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-amd64), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-amd64), func Setegid(int) error
+pkg syscall (linux-amd64), func Seteuid(int) error
+pkg syscall (linux-amd64-cgo), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-amd64-cgo), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-amd64-cgo), func Setegid(int) error
+pkg syscall (linux-amd64-cgo), func Seteuid(int) error
+pkg syscall (linux-arm), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-arm), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-arm), func Setegid(int) error
+pkg syscall (linux-arm), func Seteuid(int) error
+pkg syscall (linux-arm-cgo), func AllThreadsSyscall(uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-arm-cgo), func AllThreadsSyscall6(uintptr, uintptr, uintptr, uintptr, uintptr, uintptr, uintptr) (uintptr, uintptr, Errno)
+pkg syscall (linux-arm-cgo), func Setegid(int) error
+pkg syscall (linux-arm-cgo), func Seteuid(int) error
+pkg syscall (windows-386), func RtlGenRandom(*uint8, uint32) error
+pkg syscall (windows-amd64), func RtlGenRandom(*uint8, uint32) error
+pkg testing/fstest, func TestFS(fs.FS, ...string) error
+pkg testing/fstest, method (MapFS) Glob(string) ([]string, error)
+pkg testing/fstest, method (MapFS) Open(string) (fs.File, error)
+pkg testing/fstest, method (MapFS) ReadDir(string) ([]fs.DirEntry, error)
+pkg testing/fstest, method (MapFS) ReadFile(string) ([]uint8, error)
+pkg testing/fstest, method (MapFS) Stat(string) (fs.FileInfo, error)
+pkg testing/fstest, type MapFS map[string]*MapFile
+pkg testing/fstest, type MapFile struct
+pkg testing/fstest, type MapFile struct, Data []uint8
+pkg testing/fstest, type MapFile struct, ModTime time.Time
+pkg testing/fstest, type MapFile struct, Mode fs.FileMode
+pkg testing/fstest, type MapFile struct, Sys interface{}
pkg testing/iotest, func ErrReader(error) io.Reader
+pkg testing/iotest, func TestReader(io.Reader, []uint8) error
+pkg text/template, func ParseFS(fs.FS, ...string) (*Template, error)
+pkg text/template, method (*Template) ParseFS(fs.FS, ...string) (*Template, error)
pkg text/template/parse, const NodeComment = 20
pkg text/template/parse, const NodeComment NodeType
pkg text/template/parse, const ParseComments = 1
diff --git a/doc/contribute.html b/doc/contribute.html
index 09d43313ff7..f297477fe03 100644
--- a/doc/contribute.html
+++ b/doc/contribute.html
@@ -947,10 +947,18 @@ The Gerrit voting system involves an integer in the range -2 to +2:
+
+At least two maintainers must approve of the change, and at least one
+of those maintainers must +2 the change.
+The second maintainer may cast a vote of Trust+1, meaning that the
+change looks basically OK, but that the maintainer hasn't done the
+detailed review required for a +2 vote.
+
+
Submitting an approved change
-After the code has been +2'ed, an approver will
+After the code has been +2'ed and Trust+1'ed, an approver will
apply it to the master branch using the Gerrit user interface.
This is called "submitting the change".
diff --git a/doc/go1.16.html b/doc/go1.16.html
index 1e73355b690..5d293078861 100644
--- a/doc/go1.16.html
+++ b/doc/go1.16.html
@@ -131,6 +131,16 @@ Do not send CLs removing the interior tags from such phrases.
being built.
+The list command
+
+
+ When the -export flag is specified, the BuildID
+ field is now set to the build ID of the compiled package. This is equivalent
+ to running go tool buildid on
+ go list -exported -f {{.Export},
+ but without the extra step.
+
+
Cgo
@@ -151,6 +161,18 @@ Do not send CLs removing the interior tags from such phrases.
TODO
+
+ On Linux, the runtime now defaults to releasing memory to the
+ operating system promptly (using MADV_DONTNEED), rather
+ than lazily when the operating system is under memory pressure
+ (using MADV_FREE). This means process-level memory
+ statistics like RSS will more accurately reflect the amount of
+ physical memory being used by Go processes. Systems that are
+ currently using GODEBUG=madvdontneed=1 to improve
+ memory monitoring behavior no longer need to set this environment
+ variable.
+
+
Compiler
@@ -182,7 +204,10 @@ Do not send CLs removing the interior tags from such phrases.
TODO: update with final numbers later in the release.
-
+
+ On Windows, go build -buildmode=c-shared now generates Windows
+ ASLR DLLs by default. ASLR can be disabled with --ldflags=-aslr=false.
+
Core library
@@ -251,15 +276,6 @@ Do not send CLs removing the interior tags from such phrases.
On Linux kernel version 4.1 and above, the maximum is now 4294967295.
-
-
-
- For interface types and values, Method,
- MethodByName, and
- NumMethod now
- operate on the interface's exported method set, rather than its full method set.
-
-
diff --git a/doc/go_spec.html b/doc/go_spec.html
index e9e9e421308..676407f6f2b 100644
--- a/doc/go_spec.html
+++ b/doc/go_spec.html
@@ -1,6 +1,6 @@
@@ -3594,23 +3594,33 @@ replaced by its left operand alone.
+var a [1024]byte
var s uint = 33
-var i = 1<<s // 1 has type int
-var j int32 = 1<<s // 1 has type int32; j == 0
-var k = uint64(1<<s) // 1 has type uint64; k == 1<<33
-var m int = 1.0<<s // 1.0 has type int; m == 0 if ints are 32bits in size
-var n = 1.0<<s == j // 1.0 has type int32; n == true
-var o = 1<<s == 2<<s // 1 and 2 have type int; o == true if ints are 32bits in size
-var p = 1<<s == 1<<33 // illegal if ints are 32bits in size: 1 has type int, but 1<<33 overflows int
-var u = 1.0<<s // illegal: 1.0 has type float64, cannot shift
-var u1 = 1.0<<s != 0 // illegal: 1.0 has type float64, cannot shift
-var u2 = 1<<s != 1.0 // illegal: 1 has type float64, cannot shift
-var v float32 = 1<<s // illegal: 1 has type float32, cannot shift
-var w int64 = 1.0<<33 // 1.0<<33 is a constant shift expression
-var x = a[1.0<<s] // 1.0 has type int; x == a[0] if ints are 32bits in size
-var a = make([]byte, 1.0<<s) // 1.0 has type int; len(a) == 0 if ints are 32bits in size
-
+// The results of the following examples are given for 64-bit ints.
+var i = 1<<s // 1 has type int
+var j int32 = 1<<s // 1 has type int32; j == 0
+var k = uint64(1<<s) // 1 has type uint64; k == 1<<33
+var m int = 1.0<<s // 1.0 has type int; m == 1<<33
+var n = 1.0<<s == j // 1.0 has type int; n == true
+var o = 1<<s == 2<<s // 1 and 2 have type int; o == false
+var p = 1<<s == 1<<33 // 1 has type int; p == true
+var u = 1.0<<s // illegal: 1.0 has type float64, cannot shift
+var u1 = 1.0<<s != 0 // illegal: 1.0 has type float64, cannot shift
+var u2 = 1<<s != 1.0 // illegal: 1 has type float64, cannot shift
+var v float32 = 1<<s // illegal: 1 has type float32, cannot shift
+var w int64 = 1.0<<33 // 1.0<<33 is a constant shift expression; w == 1<<33
+var x = a[1.0<<s] // panics: 1.0 has type int, but 1<<33 overflows array bounds
+var b = make([]byte, 1.0<<s) // 1.0 has type int; len(b) == 1<<33
+
+// The results of the following examples are given for 32-bit ints,
+// which means the shifts will overflow.
+var mm int = 1.0<<s // 1.0 has type int; mm == 0
+var oo = 1<<s == 2<<s // 1 and 2 have type int; oo == true
+var pp = 1<<s == 1<<33 // illegal: 1 has type int, but 1<<33 overflows int
+var xx = a[1.0<<s] // 1.0 has type int; xx == a[0]
+var bb = make([]byte, 1.0<<s) // 1.0 has type int; len(bb) == 0
+
Operator precedence
diff --git a/lib/time/update.bash b/lib/time/update.bash
index 65ef8fb62bc..8eebdf11f4f 100755
--- a/lib/time/update.bash
+++ b/lib/time/update.bash
@@ -8,8 +8,8 @@
# Consult https://www.iana.org/time-zones for the latest versions.
# Versions to use.
-CODE=2020b
-DATA=2020b
+CODE=2020d
+DATA=2020d
set -e
rm -rf work
diff --git a/lib/time/zoneinfo.zip b/lib/time/zoneinfo.zip
index 13034f804ca..fa143a296da 100644
Binary files a/lib/time/zoneinfo.zip and b/lib/time/zoneinfo.zip differ
diff --git a/misc/cgo/test/callback.go b/misc/cgo/test/callback.go
index e7496502931..814888e3ac5 100644
--- a/misc/cgo/test/callback.go
+++ b/misc/cgo/test/callback.go
@@ -181,7 +181,7 @@ func testCallbackCallers(t *testing.T) {
name := []string{
"runtime.cgocallbackg1",
"runtime.cgocallbackg",
- "runtime.cgocallback_gofunc",
+ "runtime.cgocallback",
"runtime.asmcgocall",
"runtime.cgocall",
"test._Cfunc_callback",
diff --git a/misc/cgo/test/cgo_linux_test.go b/misc/cgo/test/cgo_linux_test.go
index 7b56e11a27d..a9746b552ee 100644
--- a/misc/cgo/test/cgo_linux_test.go
+++ b/misc/cgo/test/cgo_linux_test.go
@@ -15,5 +15,6 @@ func TestSetgid(t *testing.T) {
}
testSetgid(t)
}
+func Test1435(t *testing.T) { test1435(t) }
func Test6997(t *testing.T) { test6997(t) }
func TestBuildID(t *testing.T) { testBuildID(t) }
diff --git a/misc/cgo/test/cgo_test.go b/misc/cgo/test/cgo_test.go
index b745a4417f1..f7a76d047bd 100644
--- a/misc/cgo/test/cgo_test.go
+++ b/misc/cgo/test/cgo_test.go
@@ -76,6 +76,8 @@ func TestCheckConst(t *testing.T) { testCheckConst(t) }
func TestConst(t *testing.T) { testConst(t) }
func TestCthread(t *testing.T) { testCthread(t) }
func TestEnum(t *testing.T) { testEnum(t) }
+func TestNamedEnum(t *testing.T) { testNamedEnum(t) }
+func TestCastToEnum(t *testing.T) { testCastToEnum(t) }
func TestErrno(t *testing.T) { testErrno(t) }
func TestFpVar(t *testing.T) { testFpVar(t) }
func TestHelpers(t *testing.T) { testHelpers(t) }
diff --git a/misc/cgo/test/issue1435.go b/misc/cgo/test/issue1435.go
new file mode 100644
index 00000000000..155d33baffe
--- /dev/null
+++ b/misc/cgo/test/issue1435.go
@@ -0,0 +1,152 @@
+// Copyright 2019 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build linux,cgo
+
+package cgotest
+
+import (
+ "fmt"
+ "io/ioutil"
+ "strings"
+ "syscall"
+ "testing"
+)
+
+// #include
+// #include
+// #include
+// #include
+// #include
+//
+// pthread_t *t = NULL;
+// pthread_mutex_t mu;
+// int nts = 0;
+// int all_done = 0;
+//
+// static void *aFn(void *vargp) {
+// int done = 0;
+// while (!done) {
+// usleep(100);
+// pthread_mutex_lock(&mu);
+// done = all_done;
+// pthread_mutex_unlock(&mu);
+// }
+// return NULL;
+// }
+//
+// void trial(int argc) {
+// int i;
+// nts = argc;
+// t = calloc(nts, sizeof(pthread_t));
+// pthread_mutex_init(&mu, NULL);
+// for (i = 0; i < nts; i++) {
+// pthread_create(&t[i], NULL, aFn, NULL);
+// }
+// }
+//
+// void cleanup(void) {
+// int i;
+// pthread_mutex_lock(&mu);
+// all_done = 1;
+// pthread_mutex_unlock(&mu);
+// for (i = 0; i < nts; i++) {
+// pthread_join(t[i], NULL);
+// }
+// pthread_mutex_destroy(&mu);
+// free(t);
+// }
+import "C"
+
+// compareStatus is used to confirm the contents of the thread
+// specific status files match expectations.
+func compareStatus(filter, expect string) error {
+ expected := filter + "\t" + expect
+ pid := syscall.Getpid()
+ fs, err := ioutil.ReadDir(fmt.Sprintf("/proc/%d/task", pid))
+ if err != nil {
+ return fmt.Errorf("unable to find %d tasks: %v", pid, err)
+ }
+ for _, f := range fs {
+ tf := fmt.Sprintf("/proc/%s/status", f.Name())
+ d, err := ioutil.ReadFile(tf)
+ if err != nil {
+ return fmt.Errorf("unable to read %q: %v", tf, err)
+ }
+ lines := strings.Split(string(d), "\n")
+ for _, line := range lines {
+ if strings.HasPrefix(line, filter) {
+ if line != expected {
+ return fmt.Errorf("%s %s (bad)\n", tf, line)
+ }
+ break
+ }
+ }
+ }
+ return nil
+}
+
+// test1435 test 9 glibc implemented setuid/gid syscall functions are
+// mapped. This test is a slightly more expansive test than that of
+// src/syscall/syscall_linux_test.go:TestSetuidEtc() insofar as it
+// launches concurrent threads from C code via CGo and validates that
+// they are subject to the system calls being tested. For the actual
+// Go functionality being tested here, the syscall_linux_test version
+// is considered authoritative, but non-trivial improvements to that
+// should be mirrored here.
+func test1435(t *testing.T) {
+ if syscall.Getuid() != 0 {
+ t.Skip("skipping root only test")
+ }
+
+ // Launch some threads in C.
+ const cts = 5
+ C.trial(cts)
+ defer C.cleanup()
+
+ vs := []struct {
+ call string
+ fn func() error
+ filter, expect string
+ }{
+ {call: "Setegid(1)", fn: func() error { return syscall.Setegid(1) }, filter: "Gid:", expect: "0\t1\t0\t1"},
+ {call: "Setegid(0)", fn: func() error { return syscall.Setegid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+
+ {call: "Seteuid(1)", fn: func() error { return syscall.Seteuid(1) }, filter: "Uid:", expect: "0\t1\t0\t1"},
+ {call: "Setuid(0)", fn: func() error { return syscall.Setuid(0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+
+ {call: "Setgid(1)", fn: func() error { return syscall.Setgid(1) }, filter: "Gid:", expect: "1\t1\t1\t1"},
+ {call: "Setgid(0)", fn: func() error { return syscall.Setgid(0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+
+ {call: "Setgroups([]int{0,1,2,3})", fn: func() error { return syscall.Setgroups([]int{0, 1, 2, 3}) }, filter: "Groups:", expect: "0 1 2 3 "},
+ {call: "Setgroups(nil)", fn: func() error { return syscall.Setgroups(nil) }, filter: "Groups:", expect: " "},
+ {call: "Setgroups([]int{0})", fn: func() error { return syscall.Setgroups([]int{0}) }, filter: "Groups:", expect: "0 "},
+
+ {call: "Setregid(101,0)", fn: func() error { return syscall.Setregid(101, 0) }, filter: "Gid:", expect: "101\t0\t0\t0"},
+ {call: "Setregid(0,102)", fn: func() error { return syscall.Setregid(0, 102) }, filter: "Gid:", expect: "0\t102\t102\t102"},
+ {call: "Setregid(0,0)", fn: func() error { return syscall.Setregid(0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+
+ {call: "Setreuid(1,0)", fn: func() error { return syscall.Setreuid(1, 0) }, filter: "Uid:", expect: "1\t0\t0\t0"},
+ {call: "Setreuid(0,2)", fn: func() error { return syscall.Setreuid(0, 2) }, filter: "Uid:", expect: "0\t2\t2\t2"},
+ {call: "Setreuid(0,0)", fn: func() error { return syscall.Setreuid(0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+
+ {call: "Setresgid(101,0,102)", fn: func() error { return syscall.Setresgid(101, 0, 102) }, filter: "Gid:", expect: "101\t0\t102\t0"},
+ {call: "Setresgid(0,102,101)", fn: func() error { return syscall.Setresgid(0, 102, 101) }, filter: "Gid:", expect: "0\t102\t101\t102"},
+ {call: "Setresgid(0,0,0)", fn: func() error { return syscall.Setresgid(0, 0, 0) }, filter: "Gid:", expect: "0\t0\t0\t0"},
+
+ {call: "Setresuid(1,0,2)", fn: func() error { return syscall.Setresuid(1, 0, 2) }, filter: "Uid:", expect: "1\t0\t2\t0"},
+ {call: "Setresuid(0,2,1)", fn: func() error { return syscall.Setresuid(0, 2, 1) }, filter: "Uid:", expect: "0\t2\t1\t2"},
+ {call: "Setresuid(0,0,0)", fn: func() error { return syscall.Setresuid(0, 0, 0) }, filter: "Uid:", expect: "0\t0\t0\t0"},
+ }
+
+ for i, v := range vs {
+ if err := v.fn(); err != nil {
+ t.Errorf("[%d] %q failed: %v", i, v.call, err)
+ continue
+ }
+ if err := compareStatus(v.filter, v.expect); err != nil {
+ t.Errorf("[%d] %q comparison: %v", i, v.call, err)
+ }
+ }
+}
diff --git a/misc/cgo/test/test.go b/misc/cgo/test/test.go
index a78f88499b2..65823b1ca0e 100644
--- a/misc/cgo/test/test.go
+++ b/misc/cgo/test/test.go
@@ -1000,6 +1000,32 @@ func testEnum(t *testing.T) {
}
}
+func testNamedEnum(t *testing.T) {
+ e := new(C.enum_E)
+
+ *e = C.Enum1
+ if *e != 1 {
+ t.Error("bad enum", C.Enum1)
+ }
+
+ *e = C.Enum2
+ if *e != 2 {
+ t.Error("bad enum", C.Enum2)
+ }
+}
+
+func testCastToEnum(t *testing.T) {
+ e := C.enum_E(C.Enum1)
+ if e != 1 {
+ t.Error("bad enum", C.Enum1)
+ }
+
+ e = C.enum_E(C.Enum2)
+ if e != 2 {
+ t.Error("bad enum", C.Enum2)
+ }
+}
+
func testAtol(t *testing.T) {
l := Atol("123")
if l != 123 {
diff --git a/misc/cgo/test/testdata/issue9400/asm_riscv64.s b/misc/cgo/test/testdata/issue9400/asm_riscv64.s
new file mode 100644
index 00000000000..20fcc0066d6
--- /dev/null
+++ b/misc/cgo/test/testdata/issue9400/asm_riscv64.s
@@ -0,0 +1,31 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build riscv64
+// +build !gccgo
+
+#include "textflag.h"
+
+TEXT ·RewindAndSetgid(SB),NOSPLIT|NOFRAME,$0-0
+ // Rewind stack pointer so anything that happens on the stack
+ // will clobber the test pattern created by the caller
+ ADD $(1024*8), X2
+
+ // Ask signaller to setgid
+ MOV $1, X5
+ FENCE
+ MOVW X5, ·Baton(SB)
+ FENCE
+
+ // Wait for setgid completion
+loop:
+ FENCE
+ MOVW ·Baton(SB), X5
+ OR X6, X6, X6 // hint that we're in a spin loop
+ BNE ZERO, X5, loop
+ FENCE
+
+ // Restore stack
+ ADD $(-1024*8), X2
+ RET
diff --git a/misc/cgo/testcshared/cshared_test.go b/misc/cgo/testcshared/cshared_test.go
index d557f34b0ff..3a4886cf30a 100644
--- a/misc/cgo/testcshared/cshared_test.go
+++ b/misc/cgo/testcshared/cshared_test.go
@@ -7,6 +7,8 @@ package cshared_test
import (
"bytes"
"debug/elf"
+ "debug/pe"
+ "encoding/binary"
"flag"
"fmt"
"io/ioutil"
@@ -355,6 +357,101 @@ func TestExportedSymbols(t *testing.T) {
}
}
+func checkNumberOfExportedFunctionsWindows(t *testing.T, exportAllSymbols bool) {
+ const prog = `
+package main
+
+import "C"
+
+//export GoFunc
+func GoFunc() {
+ println(42)
+}
+
+//export GoFunc2
+func GoFunc2() {
+ println(24)
+}
+
+func main() {
+}
+`
+
+ tmpdir := t.TempDir()
+
+ srcfile := filepath.Join(tmpdir, "test.go")
+ objfile := filepath.Join(tmpdir, "test.dll")
+ if err := ioutil.WriteFile(srcfile, []byte(prog), 0666); err != nil {
+ t.Fatal(err)
+ }
+ argv := []string{"build", "-buildmode=c-shared"}
+ if exportAllSymbols {
+ argv = append(argv, "-ldflags", "-extldflags=-Wl,--export-all-symbols")
+ }
+ argv = append(argv, "-o", objfile, srcfile)
+ out, err := exec.Command("go", argv...).CombinedOutput()
+ if err != nil {
+ t.Fatalf("build failure: %s\n%s\n", err, string(out))
+ }
+
+ f, err := pe.Open(objfile)
+ if err != nil {
+ t.Fatalf("pe.Open failed: %v", err)
+ }
+ defer f.Close()
+ section := f.Section(".edata")
+ if section == nil {
+ t.Fatalf(".edata section is not present")
+ }
+
+ // TODO: deduplicate this struct from cmd/link/internal/ld/pe.go
+ type IMAGE_EXPORT_DIRECTORY struct {
+ _ [2]uint32
+ _ [2]uint16
+ _ [2]uint32
+ NumberOfFunctions uint32
+ NumberOfNames uint32
+ _ [3]uint32
+ }
+ var e IMAGE_EXPORT_DIRECTORY
+ if err := binary.Read(section.Open(), binary.LittleEndian, &e); err != nil {
+ t.Fatalf("binary.Read failed: %v", err)
+ }
+
+ // Only the two exported functions and _cgo_dummy_export should be exported
+ expectedNumber := uint32(3)
+
+ if exportAllSymbols {
+ if e.NumberOfFunctions <= expectedNumber {
+ t.Fatalf("missing exported functions: %v", e.NumberOfFunctions)
+ }
+ if e.NumberOfNames <= expectedNumber {
+ t.Fatalf("missing exported names: %v", e.NumberOfNames)
+ }
+ } else {
+ if e.NumberOfFunctions != expectedNumber {
+ t.Fatalf("got %d exported functions; want %d", e.NumberOfFunctions, expectedNumber)
+ }
+ if e.NumberOfNames != expectedNumber {
+ t.Fatalf("got %d exported names; want %d", e.NumberOfNames, expectedNumber)
+ }
+ }
+}
+
+func TestNumberOfExportedFunctions(t *testing.T) {
+ if GOOS != "windows" {
+ t.Skip("skipping windows only test")
+ }
+ t.Parallel()
+
+ t.Run("OnlyExported", func(t *testing.T) {
+ checkNumberOfExportedFunctionsWindows(t, false)
+ })
+ t.Run("All", func(t *testing.T) {
+ checkNumberOfExportedFunctionsWindows(t, true)
+ })
+}
+
// test1: shared library can be dynamically loaded and exported symbols are accessible.
func TestExportedSymbolsWithDynamicLoad(t *testing.T) {
t.Parallel()
diff --git a/misc/wasm/wasm_exec.js b/misc/wasm/wasm_exec.js
index 3ea03c45b7e..82041e6bb90 100644
--- a/misc/wasm/wasm_exec.js
+++ b/misc/wasm/wasm_exec.js
@@ -503,6 +503,9 @@
}
async run(instance) {
+ if (!(instance instanceof WebAssembly.Instance)) {
+ throw new Error("Go.run: WebAssembly.Instance expected");
+ }
this._inst = instance;
this.mem = new DataView(this._inst.exports.mem.buffer);
this._values = [ // JS values that Go currently has references to, indexed by reference id
diff --git a/src/cmd/asm/internal/arch/arch.go b/src/cmd/asm/internal/arch/arch.go
index 2e5d0ff9911..a62e55191e6 100644
--- a/src/cmd/asm/internal/arch/arch.go
+++ b/src/cmd/asm/internal/arch/arch.go
@@ -535,6 +535,9 @@ func archRISCV64() *Arch {
// Standard register names.
for i := riscv.REG_X0; i <= riscv.REG_X31; i++ {
+ if i == riscv.REG_G {
+ continue
+ }
name := fmt.Sprintf("X%d", i-riscv.REG_X0)
register[name] = int16(i)
}
@@ -571,7 +574,7 @@ func archRISCV64() *Arch {
register["S8"] = riscv.REG_S8
register["S9"] = riscv.REG_S9
register["S10"] = riscv.REG_S10
- register["S11"] = riscv.REG_S11
+ // Skip S11 as it is the g register.
register["T3"] = riscv.REG_T3
register["T4"] = riscv.REG_T4
register["T5"] = riscv.REG_T5
diff --git a/src/cmd/asm/internal/arch/arm64.go b/src/cmd/asm/internal/arch/arm64.go
index e643889aef8..e557630ca60 100644
--- a/src/cmd/asm/internal/arch/arm64.go
+++ b/src/cmd/asm/internal/arch/arm64.go
@@ -75,7 +75,7 @@ func IsARM64STLXR(op obj.As) bool {
arm64.ASTXP, arm64.ASTXPW, arm64.ASTLXP, arm64.ASTLXPW:
return true
}
- // atomic instructions
+ // LDADDx/SWPx/CASx atomic instructions
if arm64.IsAtomicInstruction(op) {
return true
}
@@ -93,6 +93,17 @@ func IsARM64TBL(op obj.As) bool {
return false
}
+// IsARM64CASP reports whether the op (as defined by an arm64.A*
+// constant) is one of the CASP-like instructions, and its 2nd
+// destination is a register pair that require special handling.
+func IsARM64CASP(op obj.As) bool {
+ switch op {
+ case arm64.ACASPD, arm64.ACASPW:
+ return true
+ }
+ return false
+}
+
// ARM64Suffix handles the special suffix for the ARM64.
// It returns a boolean to indicate success; failure means
// cond was unrecognized.
diff --git a/src/cmd/asm/internal/asm/asm.go b/src/cmd/asm/internal/asm/asm.go
index b9efa454edd..c4032759bb5 100644
--- a/src/cmd/asm/internal/asm/asm.go
+++ b/src/cmd/asm/internal/asm/asm.go
@@ -637,6 +637,18 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
prog.From = a[0]
prog.SetFrom3(a[1])
prog.To = a[2]
+ case arch.IsARM64CASP(op):
+ prog.From = a[0]
+ prog.To = a[1]
+ // both 1st operand and 3rd operand are (Rs, Rs+1) register pair.
+ // And the register pair must be contiguous.
+ if (a[0].Type != obj.TYPE_REGREG) || (a[2].Type != obj.TYPE_REGREG) {
+ p.errorf("invalid addressing modes for 1st or 3rd operand to %s instruction, must be register pair", op)
+ return
+ }
+ // For ARM64 CASP-like instructions, its 2nd destination operand is register pair(Rt, Rt+1) that can
+ // not fit into prog.RegTo2, so save it to the prog.RestArgs.
+ prog.SetTo2(a[2])
default:
prog.From = a[0]
prog.Reg = p.getRegister(prog, op, &a[1])
@@ -725,7 +737,7 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
}
if p.arch.Family == sys.AMD64 {
prog.From = a[0]
- prog.RestArgs = []obj.Addr{a[1], a[2]}
+ prog.SetRestArgs([]obj.Addr{a[1], a[2]})
prog.To = a[3]
break
}
@@ -808,13 +820,13 @@ func (p *Parser) asmInstruction(op obj.As, cond string, a []obj.Addr) {
}
if p.arch.Family == sys.AMD64 {
prog.From = a[0]
- prog.RestArgs = []obj.Addr{a[1], a[2], a[3]}
+ prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]})
prog.To = a[4]
break
}
if p.arch.Family == sys.S390X {
prog.From = a[0]
- prog.RestArgs = []obj.Addr{a[1], a[2], a[3]}
+ prog.SetRestArgs([]obj.Addr{a[1], a[2], a[3]})
prog.To = a[4]
break
}
diff --git a/src/cmd/asm/internal/asm/endtoend_test.go b/src/cmd/asm/internal/asm/endtoend_test.go
index 989b7a54054..7472507caf1 100644
--- a/src/cmd/asm/internal/asm/endtoend_test.go
+++ b/src/cmd/asm/internal/asm/endtoend_test.go
@@ -390,12 +390,7 @@ func TestARM64Errors(t *testing.T) {
}
func TestAMD64EndToEnd(t *testing.T) {
- defer func(old string) { objabi.GOAMD64 = old }(objabi.GOAMD64)
- for _, goamd64 := range []string{"normaljumps", "alignedjumps"} {
- t.Logf("GOAMD64=%s", goamd64)
- objabi.GOAMD64 = goamd64
- testEndToEnd(t, "amd64", "amd64")
- }
+ testEndToEnd(t, "amd64", "amd64")
}
func Test386Encoder(t *testing.T) {
diff --git a/src/cmd/asm/internal/asm/testdata/arm64.s b/src/cmd/asm/internal/asm/testdata/arm64.s
index 7f495b90bb3..5547cf634cc 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64.s
@@ -10,14 +10,8 @@
TEXT foo(SB), DUPOK|NOSPLIT, $-8
-//
-// ADD
-//
-// LTYPE1 imsr ',' spreg ',' reg
-// {
-// outcode($1, &$2, $4, &$6);
-// }
-// imsr comes from the old 7a, we only support immediates and registers
+
+// arithmetic operations
ADDW $1, R2, R3
ADDW R1, R2, R3
ADDW R1, ZR, R3
@@ -25,18 +19,29 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
ADD R1, R2, R3
ADD R1, ZR, R3
ADD $1, R2, R3
- ADD $0x000aaa, R2, R3 // ADD $2730, R2, R3 // 43a82a91
- ADD $0x000aaa, R2 // ADD $2730, R2 // 42a82a91
- ADD $0xaaa000, R2, R3 // ADD $11182080, R2, R3 // 43a86a91
- ADD $0xaaa000, R2 // ADD $11182080, R2 // 42a86a91
- ADD $0xaaaaaa, R2, R3 // ADD $11184810, R2, R3 // 43a82a9163a86a91
- ADD $0xaaaaaa, R2 // ADD $11184810, R2 // 42a82a9142a86a91
- SUB $0x000aaa, R2, R3 // SUB $2730, R2, R3 // 43a82ad1
- SUB $0x000aaa, R2 // SUB $2730, R2 // 42a82ad1
- SUB $0xaaa000, R2, R3 // SUB $11182080, R2, R3 // 43a86ad1
- SUB $0xaaa000, R2 // SUB $11182080, R2 // 42a86ad1
- SUB $0xaaaaaa, R2, R3 // SUB $11184810, R2, R3 // 43a82ad163a86ad1
- SUB $0xaaaaaa, R2 // SUB $11184810, R2 // 42a82ad142a86ad1
+ ADDW $1, R2
+ ADDW R1, R2
+ ADD $1, R2
+ ADD R1, R2
+ ADD R1>>11, R2
+ ADD R1<<22, R2
+ ADD R1->33, R2
+ ADD $0x000aaa, R2, R3 // ADD $2730, R2, R3 // 43a82a91
+ ADD $0x000aaa, R2 // ADD $2730, R2 // 42a82a91
+ ADD $0xaaa000, R2, R3 // ADD $11182080, R2, R3 // 43a86a91
+ ADD $0xaaa000, R2 // ADD $11182080, R2 // 42a86a91
+ ADD $0xaaaaaa, R2, R3 // ADD $11184810, R2, R3 // 43a82a9163a86a91
+ ADD $0xaaaaaa, R2 // ADD $11184810, R2 // 42a82a9142a86a91
+ SUB $0x000aaa, R2, R3 // SUB $2730, R2, R3 // 43a82ad1
+ SUB $0x000aaa, R2 // SUB $2730, R2 // 42a82ad1
+ SUB $0xaaa000, R2, R3 // SUB $11182080, R2, R3 // 43a86ad1
+ SUB $0xaaa000, R2 // SUB $11182080, R2 // 42a86ad1
+ SUB $0xaaaaaa, R2, R3 // SUB $11184810, R2, R3 // 43a82ad163a86ad1
+ SUB $0xaaaaaa, R2 // SUB $11184810, R2 // 42a82ad142a86ad1
+ ADDW $0x60060, R2 // ADDW $393312, R2 // 4280011142804111
+ ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091
+ SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb
+ ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
ADD R1>>11, R2, R3
ADD R1<<22, R2, R3
ADD R1->33, R2, R3
@@ -59,6 +64,30 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
CMN R1.SXTX<<2, R10 // 5fe921ab
CMPW R2.UXTH<<3, R11 // 7f2d226b
CMNW R1.SXTB, R9 // 3f81212b
+ CMPW $0x60060, R2 // CMPW $393312, R2 // 1b0c8052db00a0725f001b6b
+ CMPW $40960, R0 // 1f284071
+ CMPW $27745, R2 // 3b8c8d525f001b6b
+ CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
+ CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
+ CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb
+ CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb
+ CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb
+ CMP $3, R2
+ CMP R1, R2
+ CMP R1->11, R2
+ CMP R1>>22, R2
+ CMP R1<<33, R2
+ CMP R22.SXTX, RSP // ffe336eb
+ CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb
+ CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b
+ CCMN MI, ZR, R1, $4 // e44341ba
+ // MADD Rn,Rm,Ra,Rd
+ MADD R1, R2, R3, R4 // 6408019b
+ // CLS
+ CLSW R1, R2
+ CLS R1, R2
+
+// fp/simd instructions.
VADDP V1.B16, V2.B16, V3.B16 // 43bc214e
VADDP V1.S4, V2.S4, V3.S4 // 43bca14e
VADDP V1.D2, V2.D2, V3.D2 // 43bce14e
@@ -67,22 +96,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VORR V5.B16, V4.B16, V3.B16 // 831ca54e
VADD V16.S4, V5.S4, V9.S4 // a984b04e
VEOR V0.B16, V1.B16, V0.B16 // 201c206e
- SHA256H V9.S4, V3, V2 // 6240095e
- SHA256H2 V9.S4, V4, V3 // 8350095e
- SHA256SU0 V8.S4, V7.S4 // 0729285e
- SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e
- SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e
- SHA1SU1 V5.S4, V1.S4 // a118285e
- SHA1C V1.S4, V2, V3 // 4300015e
- SHA1H V5, V4 // a408285e
- SHA1M V8.S4, V7, V6 // e620085e
- SHA1P V11.S4, V10, V9 // 49110b5e
- SHA512H V2.D2, V1, V0 // 208062ce
- SHA512H2 V4.D2, V3, V2 // 628464ce
- SHA512SU0 V9.D2, V8.D2 // 2881c0ce
- SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce
- VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace
- VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce
VADDV V0.S4, V0 // 00b8b14e
VMOVI $82, V0.B16 // 40e6024f
VUADDLV V6.B16, V6 // c638306e
@@ -96,10 +109,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VFMLS V1.D2, V12.D2, V1.D2 // 81cde14e
VFMLS V1.S2, V12.S2, V1.S2 // 81cda10e
VFMLS V1.S4, V12.S4, V1.S4 // 81cda14e
- VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e
- VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e
- VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e
- VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e
VEXT $4, V2.B8, V1.B8, V3.B8 // 2320022e
VEXT $8, V2.B16, V1.B16, V3.B16 // 2340026e
VRBIT V24.B16, V24.B16 // 185b606e
@@ -125,6 +134,14 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VSRI $8, V1.H8, V2.H8 // 2244186f
VSRI $2, V1.B8, V2.B8 // 22440e2f
VSRI $2, V1.B16, V2.B16 // 22440e6f
+ VSLI $7, V2.B16, V3.B16 // 43540f6f
+ VSLI $15, V3.H4, V4.H4 // 64541f2f
+ VSLI $31, V5.S4, V6.S4 // a6543f6f
+ VSLI $63, V7.D2, V8.D2 // e8547f6f
+ VUSRA $8, V2.B16, V3.B16 // 4314086f
+ VUSRA $16, V3.H4, V4.H4 // 6414102f
+ VUSRA $32, V5.S4, V6.S4 // a614206f
+ VUSRA $64, V7.D2, V8.D2 // e814406f
VTBL V22.B16, [V28.B16, V29.B16], V11.B16 // 8b23164e
VTBL V18.B8, [V17.B16, V18.B16, V19.B16], V22.B8 // 3642120e
VTBL V31.B8, [V14.B16, V15.B16, V16.B16, V17.B16], V15.B8 // cf611f0e
@@ -141,8 +158,6 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VTBL V14.B16, [V3.B16, V4.B16, V5.B16], V17.B16 // 71400e4e
VTBL V13.B16, [V29.B16, V30.B16, V31.B16, V0.B16], V28.B16 // bc630d4e
VTBL V3.B8, [V27.B16], V8.B8 // 6803030e
- VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce
- VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce
VZIP1 V16.H8, V3.H8, V19.H8 // 7338504e
VZIP2 V22.D2, V25.D2, V21.D2 // 357bd64e
VZIP1 V6.D2, V9.D2, V11.D2 // 2b39c64e
@@ -180,114 +195,95 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VUSHLL2 $31, V30.S4, V2.D2 // c2a73f6f
VBIF V0.B8, V30.B8, V1.B8 // c11fe02e
VBIF V30.B16, V0.B16, V2.B16 // 021cfe6e
- MOVD (R2)(R6.SXTW), R4 // 44c866f8
- MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
- MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8
- MOVWU (R19)(R20<<2), R20 // 747a74b8
- MOVD (R2)(R6<<3), R4 // 447866f8
- MOVD (R3)(R7.SXTX<<3), R8 // 68f867f8
- MOVWU (R5)(R4.UXTW), R10 // aa4864b8
- MOVBU (R3)(R9.UXTW), R8 // 68486938
- MOVBU (R5)(R8), R10 // MOVBU (R5)(R8*1), R10 // aa686838
- MOVHU (R2)(R7.SXTW<<1), R11 // 4bd86778
- MOVHU (R1)(R2<<1), R5 // 25786278
- MOVB (R9)(R3.UXTW), R6 // 2649a338
- MOVB (R10)(R6), R15 // MOVB (R10)(R6*1), R15 // 4f69a638
- MOVH (R5)(R7.SXTX<<1), R19 // b3f8a778
- MOVH (R8)(R4<<1), R10 // 0a79a478
- MOVW (R9)(R8.SXTW<<2), R19 // 33d9a8b8
- MOVW (R1)(R4.SXTX), R11 // 2be8a4b8
- MOVW (R1)(R4.SXTX), ZR // 3fe8a4b8
- MOVW (R2)(R5), R12 // MOVW (R2)(R5*1), R12 // 4c68a5b8
- MOVD R5, (R2)(R6<<3) // 457826f8
- MOVD R9, (R6)(R7.SXTX<<3) // c9f827f8
- MOVD ZR, (R6)(R7.SXTX<<3) // dff827f8
- MOVW R8, (R2)(R3.UXTW<<2) // 485823b8
- MOVW R7, (R3)(R4.SXTW) // 67c824b8
- MOVB R4, (R2)(R6.SXTX) // 44e82638
- MOVB R8, (R3)(R9.UXTW) // 68482938
- MOVB R10, (R5)(R8) // MOVB R10, (R5)(R8*1) // aa682838
- MOVH R11, (R2)(R7.SXTW<<1) // 4bd82778
- MOVH R5, (R1)(R2<<1) // 25782278
- MOVH R7, (R2)(R5.SXTX<<1) // 47f82578
- MOVH R8, (R3)(R6.UXTW) // 68482678
- MOVB (R29)(R30<<0), R14 // ae7bbe38
- MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38
- MOVB R4, (R2)(R6.SXTX) // 44e82638
FMOVS $(4.0), F0 // 0010221e
FMOVD $(4.0), F0 // 0010621e
FMOVS $(0.265625), F1 // 01302a1e
FMOVD $(0.1796875), F2 // 02f0681e
FMOVS $(0.96875), F3 // 03f02d1e
FMOVD $(28.0), F4 // 0490671e
+ VUADDW V9.B8, V12.H8, V14.H8 // 8e11292e
+ VUADDW V13.H4, V10.S4, V11.S4 // 4b116d2e
+ VUADDW V21.S2, V24.D2, V29.D2 // 1d13b52e
+ VUADDW2 V9.B16, V12.H8, V14.H8 // 8e11296e
+ VUADDW2 V13.H8, V20.S4, V30.S4 // 9e126d6e
+ VUADDW2 V21.S4, V24.D2, V29.D2 // 1d13b56e
+ FCCMPS LT, F1, F2, $1 // 41b4211e
+ FMADDS F1, F3, F2, F4 // 440c011f
+ FMADDD F4, F5, F4, F4 // 8414441f
+ FMSUBS F13, F21, F13, F19 // b3d50d1f
+ FMSUBD F11, F7, F15, F31 // ff9d4b1f
+ FNMADDS F1, F3, F2, F4 // 440c211f
+ FNMADDD F1, F3, F2, F4 // 440c611f
+ FNMSUBS F1, F3, F2, F4 // 448c211f
+ FNMSUBD F1, F3, F2, F4 // 448c611f
+ FADDS F2, F3, F4 // 6428221e
+ FADDD F1, F2 // 4228611e
+ VDUP V19.S[0], V17.S4 // 7106044e
-// move a large constant to a Vd.
- VMOVS $0x80402010, V11 // VMOVS $2151686160, V11
- VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20
- VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10
- VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20
- FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
- FMOVS (R2)(R6<<2), F4 // 447866bc
- FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc
- FMOVD (R2)(R6<<3), F4 // 447866fc
- FMOVS F4, (R2)(R6) // FMOVS F4, (R2)(R6*1) // 446826bc
- FMOVS F4, (R2)(R6<<2) // 447826bc
- FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc
- FMOVD F4, (R2)(R6<<3) // 447826fc
+// special
+ PRFM (R2), PLDL1KEEP // 400080f9
+ PRFM 16(R2), PLDL1KEEP // 400880f9
+ PRFM 48(R6), PSTL2STRM // d31880f9
+ PRFM 8(R12), PLIL3STRM // 8d0580f9
+ PRFM (R8), $25 // 190180f9
+ PRFM 8(R9), $30 // 3e0580f9
+ NOOP // 1f2003d5
+ HINT $0 // 1f2003d5
+ DMB $1
+ SVC
- CMPW $40960, R0 // 1f284071
- CMPW $27745, R2 // 3b8c8d525f001b6b
- CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
- CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
- CMP $0xffffffffffa0, R3 // CMP $281474976710560, R3 // fb0b80921b00e0f27f001beb
- CMP $0xf4240, R1 // CMP $1000000, R1 // 1b4888d2fb01a0f23f001beb
- ADD $0x186a0, R2, R5 // ADD $100000, R2, R5 // 45801a91a5604091
- SUB $0xe7791f700, R3, R1 // SUB $62135596800, R3, R1 // 1be09ed23bf2aef2db01c0f261001bcb
- CMP $3343198598084851058, R3 // 5bae8ed2db8daef23badcdf2bbcce5f27f001beb
- ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
-// LTYPE1 imsr ',' spreg ','
-// {
-// outcode($1, &$2, $4, &nullgen);
-// }
-// LTYPE1 imsr ',' reg
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
- ADDW $1, R2
- ADDW R1, R2
- ADD $1, R2
- ADD R1, R2
- ADD R1>>11, R2
- ADD R1<<22, R2
- ADD R1->33, R2
- AND R1@>33, R2
+// encryption
+ SHA256H V9.S4, V3, V2 // 6240095e
+ SHA256H2 V9.S4, V4, V3 // 8350095e
+ SHA256SU0 V8.S4, V7.S4 // 0729285e
+ SHA256SU1 V6.S4, V5.S4, V7.S4 // a760065e
+ SHA1SU0 V11.S4, V8.S4, V6.S4 // 06310b5e
+ SHA1SU1 V5.S4, V1.S4 // a118285e
+ SHA1C V1.S4, V2, V3 // 4300015e
+ SHA1H V5, V4 // a408285e
+ SHA1M V8.S4, V7, V6 // e620085e
+ SHA1P V11.S4, V10, V9 // 49110b5e
+ SHA512H V2.D2, V1, V0 // 208062ce
+ SHA512H2 V4.D2, V3, V2 // 628464ce
+ SHA512SU0 V9.D2, V8.D2 // 2881c0ce
+ SHA512SU1 V7.D2, V6.D2, V5.D2 // c58867ce
+ VRAX1 V26.D2, V29.D2, V30.D2 // be8f7ace
+ VXAR $63, V27.D2, V21.D2, V26.D2 // bafe9bce
+ VPMULL V2.D1, V1.D1, V3.Q1 // 23e0e20e
+ VPMULL2 V2.D2, V1.D2, V4.Q1 // 24e0e24e
+ VPMULL V2.B8, V1.B8, V3.H8 // 23e0220e
+ VPMULL2 V2.B16, V1.B16, V4.H8 // 24e0224e
+ VEOR3 V2.B16, V7.B16, V12.B16, V25.B16 // 990907ce
+ VBCAX V1.B16, V2.B16, V26.B16, V31.B16 // 5f0722ce
+ VREV32 V5.B16, V5.B16 // a508206e
+ VREV64 V2.S2, V3.S2 // 4308a00e
+ VREV64 V2.S4, V3.S4 // 4308a04e
// logical ops
+//
// make sure constants get encoded into an instruction when it could
- AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192
- AND $(1<<63-1), R1 // AND $9223372036854775807, R1 // 21f84092
- ORR $(1<<63), R1 // ORR $-9223372036854775808, R1 // 210041b2
- ORR $(1<<63-1), R1 // ORR $9223372036854775807, R1 // 21f840b2
- EOR $(1<<63), R1 // EOR $-9223372036854775808, R1 // 210041d2
- EOR $(1<<63-1), R1 // EOR $9223372036854775807, R1 // 21f840d2
-
- ANDW $0x3ff00000, R2 // ANDW $1072693248, R2 // 42240c12
- BICW $0x3ff00000, R2 // BICW $1072693248, R2 // 42540212
- ORRW $0x3ff00000, R2 // ORRW $1072693248, R2 // 42240c32
- ORNW $0x3ff00000, R2 // ORNW $1072693248, R2 // 42540232
- EORW $0x3ff00000, R2 // EORW $1072693248, R2 // 42240c52
- EONW $0x3ff00000, R2 // EONW $1072693248, R2 // 42540252
-
- AND $0x22220000, R3, R4 // AND $572653568, R3, R4 // 5b44a4d264001b8a
- ORR $0x22220000, R3, R4 // ORR $572653568, R3, R4 // 5b44a4d264001baa
- EOR $0x22220000, R3, R4 // EOR $572653568, R3, R4 // 5b44a4d264001bca
- BIC $0x22220000, R3, R4 // BIC $572653568, R3, R4 // 5b44a4d264003b8a
- ORN $0x22220000, R3, R4 // ORN $572653568, R3, R4 // 5b44a4d264003baa
- EON $0x22220000, R3, R4 // EON $572653568, R3, R4 // 5b44a4d264003bca
- ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea
- BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea
-
+ AND R1@>33, R2
+ AND $(1<<63), R1 // AND $-9223372036854775808, R1 // 21004192
+ AND $(1<<63-1), R1 // AND $9223372036854775807, R1 // 21f84092
+ ORR $(1<<63), R1 // ORR $-9223372036854775808, R1 // 210041b2
+ ORR $(1<<63-1), R1 // ORR $9223372036854775807, R1 // 21f840b2
+ EOR $(1<<63), R1 // EOR $-9223372036854775808, R1 // 210041d2
+ EOR $(1<<63-1), R1 // EOR $9223372036854775807, R1 // 21f840d2
+ ANDW $0x3ff00000, R2 // ANDW $1072693248, R2 // 42240c12
+ BICW $0x3ff00000, R2 // BICW $1072693248, R2 // 42540212
+ ORRW $0x3ff00000, R2 // ORRW $1072693248, R2 // 42240c32
+ ORNW $0x3ff00000, R2 // ORNW $1072693248, R2 // 42540232
+ EORW $0x3ff00000, R2 // EORW $1072693248, R2 // 42240c52
+ EONW $0x3ff00000, R2 // EONW $1072693248, R2 // 42540252
+ AND $0x22220000, R3, R4 // AND $572653568, R3, R4 // 5b44a4d264001b8a
+ ORR $0x22220000, R3, R4 // ORR $572653568, R3, R4 // 5b44a4d264001baa
+ EOR $0x22220000, R3, R4 // EOR $572653568, R3, R4 // 5b44a4d264001bca
+ BIC $0x22220000, R3, R4 // BIC $572653568, R3, R4 // 5b44a4d264003b8a
+ ORN $0x22220000, R3, R4 // ORN $572653568, R3, R4 // 5b44a4d264003baa
+ EON $0x22220000, R3, R4 // EON $572653568, R3, R4 // 5b44a4d264003bca
+ ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea
+ BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea
EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2
TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72
TST $0x4900000049, R0 // TST $313532612681, R0 // 3b0980d23b09c0f21f001bea
@@ -316,19 +312,22 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
EONW $0x6006000060060, R5 // EONW $1689262177517664, R5 // 1b0c8052db00a072a5003b4a
ORNW $0x6006000060060, R5 // ORNW $1689262177517664, R5 // 1b0c8052db00a072a5003b2a
BICSW $0x6006000060060, R5 // BICSW $1689262177517664, R5 // 1b0c8052db00a072a5003b6a
- ADDW $0x60060, R2 // ADDW $393312, R2 // 4280011142804111
- CMPW $0x60060, R2 // CMPW $393312, R2 // 1b0c8052db00a0725f001b6b
-
// TODO: this could have better encoding
- ANDW $-1, R10 // 1b0080124a011b0a
-
- AND $8, R0, RSP // 1f007d92
- ORR $8, R0, RSP // 1f007db2
- EOR $8, R0, RSP // 1f007dd2
- BIC $8, R0, RSP // 1ff87c92
- ORN $8, R0, RSP // 1ff87cb2
- EON $8, R0, RSP // 1ff87cd2
+ ANDW $-1, R10 // 1b0080124a011b0a
+ AND $8, R0, RSP // 1f007d92
+ ORR $8, R0, RSP // 1f007db2
+ EOR $8, R0, RSP // 1f007dd2
+ BIC $8, R0, RSP // 1ff87c92
+ ORN $8, R0, RSP // 1ff87cb2
+ EON $8, R0, RSP // 1ff87cd2
+ TST $15, R2 // 5f0c40f2
+ TST R1, R2 // 5f0001ea
+ TST R1->11, R2 // 5f2c81ea
+ TST R1>>22, R2 // 5f5841ea
+ TST R1<<33, R2 // 5f8401ea
+ TST $0x22220000, R3 // TST $572653568, R3 // 5b44a4d27f001bea
+// move an immediate to a Rn.
MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2
MOVW $1000000, R4 // 04488852e401a072
MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552
@@ -348,46 +347,37 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
MOVD $-1, R1 // 01008092
MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2
MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92
+ MOVW $1, ZR
+ MOVW $1, R1
+ MOVD $1, ZR
+ MOVD $1, R1
+ MOVK $1, R1
+// move a large constant to a Vd.
+ VMOVS $0x80402010, V11 // VMOVS $2151686160, V11
+ VMOVD $0x8040201008040201, V20 // VMOVD $-9205322385119247871, V20
+ VMOVQ $0x7040201008040201, $0x8040201008040201, V10 // VMOVQ $8088500183983456769, $-9205322385119247871, V10
+ VMOVQ $0x8040201008040202, $0x7040201008040201, V20 // VMOVQ $-9205322385119247870, $8088500183983456769, V20
+
+// mov(to/from sp)
MOVD $0x1002(RSP), R1 // MOVD $4098(RSP), R1 // fb074091610b0091
MOVD $0x1708(RSP), RSP // MOVD $5896(RSP), RSP // fb0740917f231c91
MOVD $0x2001(R7), R1 // MOVD $8193(R7), R1 // fb08409161070091
MOVD $0xffffff(R7), R1 // MOVD $16777215(R7), R1 // fbfc7f9161ff3f91
-
MOVD $-0x1(R7), R1 // MOVD $-1(R7), R1 // e10400d1
MOVD $-0x30(R7), R1 // MOVD $-48(R7), R1 // e1c000d1
MOVD $-0x708(R7), R1 // MOVD $-1800(R7), R1 // e1201cd1
MOVD $-0x2000(RSP), R1 // MOVD $-8192(RSP), R1 // e10b40d1
MOVD $-0x10000(RSP), RSP // MOVD $-65536(RSP), RSP // ff4340d1
-
-//
-// CLS
-//
-// LTYPE2 imsr ',' reg
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
- CLSW R1, R2
- CLS R1, R2
-
-//
-// MOV
-//
-// LTYPE3 addr ',' addr
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
MOVW R1, R2
MOVW ZR, R1
MOVW R1, ZR
- MOVW $1, ZR
- MOVW $1, R1
- MOVW ZR, (R1)
MOVD R1, R2
MOVD ZR, R1
- MOVD $1, ZR
- MOVD $1, R1
- MOVD ZR, (R1)
+
+// store and load
+//
+// LD1/ST1
VLD1 (R8), [V1.B16, V2.B16] // 01a1404c
VLD1.P (R3), [V31.H8, V0.H8] // 7fa4df4c
VLD1.P (R8)(R20), [V21.B16, V22.B16] // VLD1.P (R8)(R20*1), [V21.B16,V22.B16] // 15a1d44c
@@ -445,34 +435,21 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VST4 [V22.D2, V23.D2, V24.D2, V25.D2], (R3) // 760c004c
VST4.P [V14.D2, V15.D2, V16.D2, V17.D2], 64(R15) // ee0d9f4c
VST4.P [V24.B8, V25.B8, V26.B8, V27.B8], (R3)(R23) // VST4.P [V24.B8, V25.B8, V26.B8, V27.B8], (R3)(R23*1) // 7800970c
- FMOVS F20, (R0) // 140000bd
+
+// pre/post-indexed
FMOVS.P F20, 4(R0) // 144400bc
FMOVS.W F20, 4(R0) // 144c00bc
- FMOVS (R0), F20 // 140040bd
+ FMOVD.P F20, 8(R1) // 348400fc
+ FMOVQ.P F13, 11(R10) // 4db5803c
+ FMOVQ.W F15, 11(R20) // 8fbe803c
+
FMOVS.P 8(R0), F20 // 148440bc
FMOVS.W 8(R0), F20 // 148c40bc
- FMOVD F20, (R2) // 540000fd
- FMOVD.P F20, 8(R1) // 348400fc
FMOVD.W 8(R1), F20 // 348c40fc
- PRFM (R2), PLDL1KEEP // 400080f9
- PRFM 16(R2), PLDL1KEEP // 400880f9
- PRFM 48(R6), PSTL2STRM // d31880f9
- PRFM 8(R12), PLIL3STRM // 8d0580f9
- PRFM (R8), $25 // 190180f9
- PRFM 8(R9), $30 // 3e0580f9
+ FMOVQ.P 11(R10), F13 // 4db5c03c
+ FMOVQ.W 11(R20), F15 // 8fbec03c
- // small offset fits into instructions
- MOVB 1(R1), R2 // 22048039
- MOVH 1(R1), R2 // 22108078
- MOVH 2(R1), R2 // 22048079
- MOVW 1(R1), R2 // 221080b8
- MOVW 4(R1), R2 // 220480b9
- MOVD 1(R1), R2 // 221040f8
- MOVD 8(R1), R2 // 220440f9
- FMOVS 1(R1), F2 // 221040bc
- FMOVS 4(R1), F2 // 220440bd
- FMOVD 1(R1), F2 // 221040fc
- FMOVD 8(R1), F2 // 220440fd
+// small offset fits into instructions
MOVB R1, 1(R2) // 41040039
MOVH R1, 1(R2) // 41100078
MOVH R1, 2(R2) // 41040079
@@ -480,18 +457,37 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
MOVW R1, 4(R2) // 410400b9
MOVD R1, 1(R2) // 411000f8
MOVD R1, 8(R2) // 410400f9
+ MOVD ZR, (R1)
+ MOVW ZR, (R1)
FMOVS F1, 1(R2) // 411000bc
FMOVS F1, 4(R2) // 410400bd
+ FMOVS F20, (R0) // 140000bd
FMOVD F1, 1(R2) // 411000fc
FMOVD F1, 8(R2) // 410400fd
+ FMOVD F20, (R2) // 540000fd
+ FMOVQ F0, 32(R5)// a008803d
+ FMOVQ F10, 65520(R10) // 4afdbf3d
+ FMOVQ F11, 64(RSP) // eb13803d
+ FMOVQ F11, 8(R20) // 8b82803c
+ FMOVQ F11, 4(R20) // 8b42803c
- // large aligned offset, use two instructions
- MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
- MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
- MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
- MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
- FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
- FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
+ MOVB 1(R1), R2 // 22048039
+ MOVH 1(R1), R2 // 22108078
+ MOVH 2(R1), R2 // 22048079
+ MOVW 1(R1), R2 // 221080b8
+ MOVW 4(R1), R2 // 220480b9
+ MOVD 1(R1), R2 // 221040f8
+ MOVD 8(R1), R2 // 220440f9
+ FMOVS (R0), F20 // 140040bd
+ FMOVS 1(R1), F2 // 221040bc
+ FMOVS 4(R1), F2 // 220440bd
+ FMOVD 1(R1), F2 // 221040fc
+ FMOVD 8(R1), F2 // 220440fd
+ FMOVQ 32(R5), F2 // a208c03d
+ FMOVQ 65520(R10), F10 // 4afdff3d
+ FMOVQ 64(RSP), F11 // eb13c03d
+
+// large aligned offset, use two instructions(add+ldr/store).
MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079
MOVW R1, 0x4004(R2) // MOVW R1, 16388(R2) // 5b104091610700b9
@@ -499,15 +495,16 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
FMOVS F1, 0x4004(R2) // FMOVS F1, 16388(R2) // 5b104091610700bd
FMOVD F1, 0x8008(R2) // FMOVD F1, 32776(R2) // 5b204091610700fd
- // very large or unaligned offset uses constant pool
- // the encoding cannot be checked as the address of the constant pool is unknown.
- // here we only test that they can be assembled.
- MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2
- MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2
- MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2
- MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2
- FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2
- FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2
+ MOVB 0x1001(R1), R2 // MOVB 4097(R1), R2 // 3b04409162078039
+ MOVH 0x2002(R1), R2 // MOVH 8194(R1), R2 // 3b08409162078079
+ MOVW 0x4004(R1), R2 // MOVW 16388(R1), R2 // 3b104091620780b9
+ MOVD 0x8008(R1), R2 // MOVD 32776(R1), R2 // 3b204091620740f9
+ FMOVS 0x4004(R1), F2 // FMOVS 16388(R1), F2 // 3b104091620740bd
+ FMOVD 0x8008(R1), F2 // FMOVD 32776(R1), F2 // 3b204091620740fd
+
+// very large or unaligned offset uses constant pool.
+// the encoding cannot be checked as the address of the constant pool is unknown.
+// here we only test that they can be assembled.
MOVB R1, 0x44332211(R2) // MOVB R1, 1144201745(R2)
MOVH R1, 0x44332211(R2) // MOVH R1, 1144201745(R2)
MOVW R1, 0x44332211(R2) // MOVW R1, 1144201745(R2)
@@ -515,14 +512,59 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
FMOVS F1, 0x44332211(R2) // FMOVS F1, 1144201745(R2)
FMOVD F1, 0x44332211(R2) // FMOVD F1, 1144201745(R2)
-//
-// MOVK
-//
-// LMOVK imm ',' reg
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
- MOVK $1, R1
+ MOVB 0x44332211(R1), R2 // MOVB 1144201745(R1), R2
+ MOVH 0x44332211(R1), R2 // MOVH 1144201745(R1), R2
+ MOVW 0x44332211(R1), R2 // MOVW 1144201745(R1), R2
+ MOVD 0x44332211(R1), R2 // MOVD 1144201745(R1), R2
+ FMOVS 0x44332211(R1), F2 // FMOVS 1144201745(R1), F2
+ FMOVD 0x44332211(R1), F2 // FMOVD 1144201745(R1), F2
+
+// shifted or extended register offset.
+ MOVD (R2)(R6.SXTW), R4 // 44c866f8
+ MOVD (R3)(R6), R5 // MOVD (R3)(R6*1), R5 // 656866f8
+ MOVD (R2)(R6), R4 // MOVD (R2)(R6*1), R4 // 446866f8
+ MOVWU (R19)(R20<<2), R20 // 747a74b8
+ MOVD (R2)(R6<<3), R4 // 447866f8
+ MOVD (R3)(R7.SXTX<<3), R8 // 68f867f8
+ MOVWU (R5)(R4.UXTW), R10 // aa4864b8
+ MOVBU (R3)(R9.UXTW), R8 // 68486938
+ MOVBU (R5)(R8), R10 // MOVBU (R5)(R8*1), R10 // aa686838
+ MOVHU (R2)(R7.SXTW<<1), R11 // 4bd86778
+ MOVHU (R1)(R2<<1), R5 // 25786278
+ MOVB (R9)(R3.UXTW), R6 // 2649a338
+ MOVB (R10)(R6), R15 // MOVB (R10)(R6*1), R15 // 4f69a638
+ MOVB (R29)(R30<<0), R14 // ae7bbe38
+ MOVB (R29)(R30), R14 // MOVB (R29)(R30*1), R14 // ae6bbe38
+ MOVH (R5)(R7.SXTX<<1), R19 // b3f8a778
+ MOVH (R8)(R4<<1), R10 // 0a79a478
+ MOVW (R9)(R8.SXTW<<2), R19 // 33d9a8b8
+ MOVW (R1)(R4.SXTX), R11 // 2be8a4b8
+ MOVW (R1)(R4.SXTX), ZR // 3fe8a4b8
+ MOVW (R2)(R5), R12 // MOVW (R2)(R5*1), R12 // 4c68a5b8
+ FMOVS (R2)(R6), F4 // FMOVS (R2)(R6*1), F4 // 446866bc
+ FMOVS (R2)(R6<<2), F4 // 447866bc
+ FMOVD (R2)(R6), F4 // FMOVD (R2)(R6*1), F4 // 446866fc
+ FMOVD (R2)(R6<<3), F4 // 447866fc
+
+ MOVD R5, (R2)(R6<<3) // 457826f8
+ MOVD R9, (R6)(R7.SXTX<<3) // c9f827f8
+ MOVD ZR, (R6)(R7.SXTX<<3) // dff827f8
+ MOVW R8, (R2)(R3.UXTW<<2) // 485823b8
+ MOVW R7, (R3)(R4.SXTW) // 67c824b8
+ MOVB R4, (R2)(R6.SXTX) // 44e82638
+ MOVB R8, (R3)(R9.UXTW) // 68482938
+ MOVB R10, (R5)(R8) // MOVB R10, (R5)(R8*1) // aa682838
+ MOVH R11, (R2)(R7.SXTW<<1) // 4bd82778
+ MOVH R5, (R1)(R2<<1) // 25782278
+ MOVH R7, (R2)(R5.SXTX<<1) // 47f82578
+ MOVH R8, (R3)(R6.UXTW) // 68482678
+ MOVB R4, (R2)(R6.SXTX) // 44e82638
+ FMOVS F4, (R2)(R6) // FMOVS F4, (R2)(R6*1) // 446826bc
+ FMOVS F4, (R2)(R6<<2) // 447826bc
+ FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc
+ FMOVD F4, (R2)(R6<<3) // 447826fc
+
+// vmov
VMOV V8.S[1], R1 // 013d0c0e
VMOV V0.D[0], R11 // 0b3c084e
VMOV V0.D[1], R11 // 0b3c184e
@@ -537,205 +579,28 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VMOV V9.H[0], V12.H[1] // 2c05066e
VMOV V8.B[0], V12.B[1] // 0c05036e
VMOV V8.B[7], V4.B[8] // 043d116e
- VREV32 V5.B16, V5.B16 // a508206e
- VREV64 V2.S2, V3.S2 // 4308a00e
- VREV64 V2.S4, V3.S4 // 4308a04e
- VDUP V19.S[0], V17.S4 // 7106044e
-//
-// B/BL
-//
-// LTYPE4 comma rel
-// {
-// outcode($1, &nullgen, NREG, &$3);
-// }
- BL 1(PC) // CALL 1(PC)
-// LTYPE4 comma nireg
-// {
-// outcode($1, &nullgen, NREG, &$3);
-// }
- BL (R2) // CALL (R2)
- BL foo(SB) // CALL foo(SB)
- BL bar<>(SB) // CALL bar<>(SB)
-//
-// BEQ
-//
-// LTYPE5 comma rel
-// {
-// outcode($1, &nullgen, NREG, &$3);
-// }
- BEQ 1(PC)
-//
-// SVC
-//
-// LTYPE6
-// {
-// outcode($1, &nullgen, NREG, &nullgen);
-// }
- SVC
-
-//
-// CMP
-//
-// LTYPE7 imsr ',' spreg comma
-// {
-// outcode($1, &$2, $4, &nullgen);
-// }
- CMP $3, R2
- CMP R1, R2
- CMP R1->11, R2
- CMP R1>>22, R2
- CMP R1<<33, R2
- CMP R22.SXTX, RSP // ffe336eb
-
- CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb
- CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b
-
-// TST
- TST $15, R2 // 5f0c40f2
- TST R1, R2 // 5f0001ea
- TST R1->11, R2 // 5f2c81ea
- TST R1>>22, R2 // 5f5841ea
- TST R1<<33, R2 // 5f8401ea
- TST $0x22220000, R3 // TST $572653568, R3 // 5b44a4d27f001bea
-
-//
// CBZ
-//
-// LTYPE8 reg ',' rel
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
again:
CBZ R1, again // CBZ R1
-//
-// CSET
-//
-// LTYPER cond ',' reg
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
- CSET GT, R1 // e1d79f9a
- CSETW HI, R2 // e2979f1a
-//
-// CSEL/CSINC/CSNEG/CSINV
-//
-// LTYPES cond ',' reg ',' reg ',' reg
-// {
-// outgcode($1, &$2, $6.reg, &$4, &$8);
-// }
+// conditional operations
+ CSET GT, R1 // e1d79f9a
+ CSETW HI, R2 // e2979f1a
CSEL LT, R1, R2, ZR // 3fb0829a
CSELW LT, R2, R3, R4 // 44b0831a
CSINC GT, R1, ZR, R3 // 23c49f9a
CSNEG MI, R1, R2, R3 // 234482da
CSINV CS, R1, R2, R3 // CSINV HS, R1, R2, R3 // 232082da
CSINVW MI, R2, ZR, R2 // 42409f5a
-
-// LTYPES cond ',' reg ',' reg
-// {
-// outcode($1, &$2, $4.reg, &$6);
-// }
CINC EQ, R4, R9 // 8914849a
CINCW PL, R2, ZR // 5f44821a
CINV PL, R11, R22 // 76418bda
CINVW LS, R7, R13 // ed80875a
CNEG LS, R13, R7 // a7858dda
CNEGW EQ, R8, R13 // 0d15885a
-//
-// CCMN
-//
-// LTYPEU cond ',' imsr ',' reg ',' imm comma
-// {
-// outgcode($1, &$2, $6.reg, &$4, &$8);
-// }
- CCMN MI, ZR, R1, $4 // e44341ba
-//
-// FADDD
-//
-// LTYPEK frcon ',' freg
-// {
-// outcode($1, &$2, NREG, &$4);
-// }
-// FADDD $0.5, F1 // FADDD $(0.5), F1
- FADDD F1, F2
-
-// LTYPEK frcon ',' freg ',' freg
-// {
-// outcode($1, &$2, $4.reg, &$6);
-// }
-// FADDD $0.7, F1, F2 // FADDD $(0.69999999999999996), F1, F2
- FADDD F1, F2, F3
-
-//
-// FCMP
-//
-// LTYPEL frcon ',' freg comma
-// {
-// outcode($1, &$2, $4.reg, &nullgen);
-// }
-// FCMP $0.2, F1
-// FCMP F1, F2
-
-//
-// FCCMP
-//
-// LTYPEF cond ',' freg ',' freg ',' imm comma
-// {
-// outgcode($1, &$2, $6.reg, &$4, &$8);
-// }
- FCCMPS LT, F1, F2, $1 // 41b4211e
-
-//
-// FMULA
-//
-// LTYPE9 freg ',' freg ',' freg ',' freg comma
-// {
-// outgcode($1, &$2, $4.reg, &$6, &$8);
-// }
-// FMULA F1, F2, F3, F4
-
-//
-// FCSEL
-//
-// LFCSEL cond ',' freg ',' freg ',' freg
-// {
-// outgcode($1, &$2, $6.reg, &$4, &$8);
-// }
-//
-// MADD Rn,Rm,Ra,Rd
-//
-// LTYPEM reg ',' reg ',' sreg ',' reg
-// {
-// outgcode($1, &$2, $6, &$4, &$8);
-// }
-// MADD R1, R2, R3, R4
-
- FMADDS F1, F3, F2, F4 // 440c011f
- FMADDD F4, F5, F4, F4 // 8414441f
- FMSUBS F13, F21, F13, F19 // b3d50d1f
- FMSUBD F11, F7, F15, F31 // ff9d4b1f
- FNMADDS F1, F3, F2, F4 // 440c211f
- FNMADDD F1, F3, F2, F4 // 440c611f
- FNMSUBS F1, F3, F2, F4 // 448c211f
- FNMSUBD F1, F3, F2, F4 // 448c611f
-
-// DMB, HINT
-//
-// LDMB imm
-// {
-// outcode($1, &$2, NREG, &nullgen);
-// }
- DMB $1
-
-//
-// STXR
-//
-// LSTXR reg ',' addr ',' reg
-// {
-// outcode($1, &$2, &$4, &$6);
-// }
+// atomic ops
LDARB (R25), R2 // 22ffdf08
LDARH (R5), R7 // a7fcdf48
LDAXPW (R10), (R20, R16) // 54c17f88
@@ -912,21 +777,36 @@ again:
LDORLH R5, (RSP), R7 // e7336578
LDORLB R5, (R6), R7 // c7306538
LDORLB R5, (RSP), R7 // e7336538
+ CASD R1, (R2), ZR // 5f7ca1c8
+ CASW R1, (RSP), ZR // ff7fa188
+ CASB ZR, (R5), R3 // a37cbf08
+ CASH R3, (RSP), ZR // ff7fa348
+ CASW R5, (R7), R6 // e67ca588
+ CASLD ZR, (RSP), R8 // e8ffbfc8
+ CASLW R9, (R10), ZR // 5ffda988
+ CASAD R7, (R11), R15 // 6f7de7c8
+ CASAW R10, (RSP), R19 // f37fea88
+ CASALD R5, (R6), R7 // c7fce5c8
+ CASALD R5, (RSP), R7 // e7ffe5c8
+ CASALW R5, (R6), R7 // c7fce588
+ CASALW R5, (RSP), R7 // e7ffe588
+ CASALH ZR, (R5), R8 // a8fcff48
+ CASALB R8, (R9), ZR // 3ffde808
+ CASPD (R30, ZR), (RSP), (R8, R9) // e87f3e48
+ CASPW (R6, R7), (R8), (R4, R5) // 047d2608
+ CASPD (R2, R3), (R2), (R8, R9) // 487c2248
+
// RET
-//
-// LTYPEA comma
-// {
-// outcode($1, &nullgen, NREG, &nullgen);
-// }
- BEQ 2(PC)
RET
RET foo(SB)
-// More B/BL cases, and canonical names JMP, CALL.
-
- BEQ 2(PC)
- B foo(SB) // JMP foo(SB)
- BL foo(SB) // CALL foo(SB)
+// B/BL/B.cond cases, and canonical names JMP, CALL.
+ BL 1(PC) // CALL 1(PC)
+ BL (R2) // CALL (R2)
+ BL foo(SB) // CALL foo(SB)
+ BL bar<>(SB) // CALL bar<>(SB)
+ B foo(SB) // JMP foo(SB)
+ BEQ 1(PC)
BEQ 2(PC)
TBZ $1, R1, 2(PC)
TBNZ $2, R2, 2(PC)
@@ -1101,8 +981,6 @@ again:
FSTPS (F3, F4), 1024(RSP) // fb0310916313002d
FSTPS (F3, F4), x(SB)
FSTPS (F3, F4), x+8(SB)
- NOOP // 1f2003d5
- HINT $0 // 1f2003d5
// System Register
MSR $1, SPSel // bf4100d5
@@ -1664,11 +1542,4 @@ again:
MSR R13, ZCR_EL1 // 0d1218d5
MRS ZCR_EL1, R23 // 171238d5
MSR R17, ZCR_EL1 // 111218d5
-
-// END
-//
-// LTYPEE comma
-// {
-// outcode($1, &nullgen, NREG, &nullgen);
-// }
END
diff --git a/src/cmd/asm/internal/asm/testdata/arm64error.s b/src/cmd/asm/internal/asm/testdata/arm64error.s
index 20b1f3e9f06..99e4d62d25b 100644
--- a/src/cmd/asm/internal/asm/testdata/arm64error.s
+++ b/src/cmd/asm/internal/asm/testdata/arm64error.s
@@ -87,13 +87,13 @@ TEXT errors(SB),$0
VLD1.P 32(R1), [V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset"
VLD1.P 48(R1), [V7.S4, V8.S4, V9.S4, V10.S4] // ERROR "invalid post-increment offset"
VPMULL V1.D1, V2.H4, V3.Q1 // ERROR "invalid arrangement"
- VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement"
- VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "invalid arrangement"
- VPMULL V1.B16, V2.B16, V3.H8 // ERROR "invalid arrangement"
+ VPMULL V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch"
+ VPMULL V1.D2, V2.D2, V3.Q1 // ERROR "operand mismatch"
+ VPMULL V1.B16, V2.B16, V3.H8 // ERROR "operand mismatch"
VPMULL2 V1.D2, V2.H4, V3.Q1 // ERROR "invalid arrangement"
- VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "invalid arrangement"
- VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "invalid arrangement"
- VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "invalid arrangement"
+ VPMULL2 V1.H4, V2.H4, V3.Q1 // ERROR "operand mismatch"
+ VPMULL2 V1.D1, V2.D1, V3.Q1 // ERROR "operand mismatch"
+ VPMULL2 V1.B8, V2.B8, V3.H8 // ERROR "operand mismatch"
VEXT $8, V1.B16, V2.B8, V2.B16 // ERROR "invalid arrangement"
VEXT $8, V1.H8, V2.H8, V2.H8 // ERROR "invalid arrangement"
VRBIT V1.B16, V2.B8 // ERROR "invalid arrangement"
@@ -353,4 +353,12 @@ TEXT errors(SB),$0
VUSHLL2 $32, V30.S4, V2.D2 // ERROR "shift amount out of range"
VBIF V0.B8, V1.B8, V2.B16 // ERROR "operand mismatch"
VBIF V0.D2, V1.D2, V2.D2 // ERROR "invalid arrangement"
+ VUADDW V9.B8, V12.H8, V14.B8 // ERROR "invalid arrangement"
+ VUADDW2 V9.B8, V12.S4, V14.S4 // ERROR "operand mismatch"
+ VSLI $64, V7.D2, V8.D2 // ERROR "shift out of range"
+ VUSRA $0, V7.D2, V8.D2 // ERROR "shift out of range"
+ CASPD (R3, R4), (R2), (R8, R9) // ERROR "source register pair must start from even register"
+ CASPD (R2, R3), (R2), (R9, R10) // ERROR "destination register pair must start from even register"
+ CASPD (R2, R4), (R2), (R8, R9) // ERROR "source register pair must be contiguous"
+ CASPD (R2, R3), (R2), (R8, R10) // ERROR "destination register pair must be contiguous"
RET
diff --git a/src/cmd/asm/internal/asm/testdata/riscvenc.s b/src/cmd/asm/internal/asm/testdata/riscvenc.s
index 8d301f2dd5e..e30a576473d 100644
--- a/src/cmd/asm/internal/asm/testdata/riscvenc.s
+++ b/src/cmd/asm/internal/asm/testdata/riscvenc.s
@@ -297,6 +297,13 @@ start:
MOVW X5, (X6) // 23205300
MOVW X5, 4(X6) // 23225300
+ MOVB X5, X6 // 1393820313538343
+ MOVH X5, X6 // 1393020313530343
+ MOVW X5, X6 // 1b830200
+ MOVBU X5, X6 // 13f3f20f
+ MOVHU X5, X6 // 1393020313530303
+ MOVWU X5, X6 // 1393020213530302
+
MOVF 4(X5), F0 // 07a04200
MOVF F0, 4(X5) // 27a20200
MOVF F0, F1 // d3000020
@@ -318,7 +325,7 @@ start:
// These jumps can get printed as jumps to 2 because they go to the
// second instruction in the function (the first instruction is an
// invisible stack pointer adjustment).
- JMP start // JMP 2 // 6ff01fc5
+ JMP start // JMP 2 // 6ff09fc2
JMP (X5) // 67800200
JMP 4(X5) // 67804200
@@ -331,16 +338,16 @@ start:
JMP asmtest(SB) // 970f0000
// Branch pseudo-instructions
- BEQZ X5, start // BEQZ X5, 2 // e38a02c2
- BGEZ X5, start // BGEZ X5, 2 // e3d802c2
- BGT X5, X6, start // BGT X5, X6, 2 // e3c662c2
- BGTU X5, X6, start // BGTU X5, X6, 2 // e3e462c2
- BGTZ X5, start // BGTZ X5, 2 // e34250c2
- BLE X5, X6, start // BLE X5, X6, 2 // e3d062c2
- BLEU X5, X6, start // BLEU X5, X6, 2 // e3fe62c0
- BLEZ X5, start // BLEZ X5, 2 // e35c50c0
- BLTZ X5, start // BLTZ X5, 2 // e3ca02c0
- BNEZ X5, start // BNEZ X5, 2 // e39802c0
+ BEQZ X5, start // BEQZ X5, 2 // e38602c0
+ BGEZ X5, start // BGEZ X5, 2 // e3d402c0
+ BGT X5, X6, start // BGT X5, X6, 2 // e3c262c0
+ BGTU X5, X6, start // BGTU X5, X6, 2 // e3e062c0
+ BGTZ X5, start // BGTZ X5, 2 // e34e50be
+ BLE X5, X6, start // BLE X5, X6, 2 // e3dc62be
+ BLEU X5, X6, start // BLEU X5, X6, 2 // e3fa62be
+ BLEZ X5, start // BLEZ X5, 2 // e35850be
+ BLTZ X5, start // BLTZ X5, 2 // e3c602be
+ BNEZ X5, start // BNEZ X5, 2 // e39402be
// Set pseudo-instructions
SEQZ X15, X15 // 93b71700
diff --git a/src/cmd/buildid/buildid.go b/src/cmd/buildid/buildid.go
index 1c7b228c987..699d9779508 100644
--- a/src/cmd/buildid/buildid.go
+++ b/src/cmd/buildid/buildid.go
@@ -22,21 +22,6 @@ func usage() {
var wflag = flag.Bool("w", false, "write build ID")
-// taken from cmd/go/internal/work/buildid.go
-func hashToString(h [32]byte) string {
- const b64 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
- const chunks = 5
- var dst [chunks * 4]byte
- for i := 0; i < chunks; i++ {
- v := uint32(h[3*i])<<16 | uint32(h[3*i+1])<<8 | uint32(h[3*i+2])
- dst[4*i+0] = b64[(v>>18)&0x3F]
- dst[4*i+1] = b64[(v>>12)&0x3F]
- dst[4*i+2] = b64[(v>>6)&0x3F]
- dst[4*i+3] = b64[v&0x3F]
- }
- return string(dst[:])
-}
-
func main() {
log.SetPrefix("buildid: ")
log.SetFlags(0)
@@ -63,12 +48,12 @@ func main() {
log.Fatal(err)
}
matches, hash, err := buildid.FindAndHash(f, id, 0)
+ f.Close()
if err != nil {
log.Fatal(err)
}
- f.Close()
- newID := id[:strings.LastIndex(id, "/")] + "/" + hashToString(hash)
+ newID := id[:strings.LastIndex(id, "/")] + "/" + buildid.HashToString(hash)
if len(newID) != len(id) {
log.Fatalf("%s: build ID length mismatch %q vs %q", file, id, newID)
}
diff --git a/src/cmd/cgo/ast.go b/src/cmd/cgo/ast.go
index 54d6bc2559d..a073407a961 100644
--- a/src/cmd/cgo/ast.go
+++ b/src/cmd/cgo/ast.go
@@ -13,7 +13,6 @@ import (
"go/scanner"
"go/token"
"os"
- "path/filepath"
"strings"
)
@@ -44,14 +43,7 @@ func sourceLine(n ast.Node) int {
// attached to the import "C" comment, a list of references to C.xxx,
// a list of exported functions, and the actual AST, to be rewritten and
// printed.
-func (f *File) ParseGo(name string, src []byte) {
- // Create absolute path for file, so that it will be used in error
- // messages and recorded in debug line number information.
- // This matches the rest of the toolchain. See golang.org/issue/5122.
- if aname, err := filepath.Abs(name); err == nil {
- name = aname
- }
-
+func (f *File) ParseGo(abspath string, src []byte) {
// Two different parses: once with comments, once without.
// The printer is not good enough at printing comments in the
// right place when we start editing the AST behind its back,
@@ -60,8 +52,8 @@ func (f *File) ParseGo(name string, src []byte) {
// and reprinting.
// In cgo mode, we ignore ast2 and just apply edits directly
// the text behind ast1. In godefs mode we modify and print ast2.
- ast1 := parse(name, src, parser.ParseComments)
- ast2 := parse(name, src, 0)
+ ast1 := parse(abspath, src, parser.ParseComments)
+ ast2 := parse(abspath, src, 0)
f.Package = ast1.Name.Name
f.Name = make(map[string]*Name)
@@ -88,7 +80,7 @@ func (f *File) ParseGo(name string, src []byte) {
cg = d.Doc
}
if cg != nil {
- f.Preamble += fmt.Sprintf("#line %d %q\n", sourceLine(cg), name)
+ f.Preamble += fmt.Sprintf("#line %d %q\n", sourceLine(cg), abspath)
f.Preamble += commentText(cg) + "\n"
f.Preamble += "#line 1 \"cgo-generated-wrapper\"\n"
}
diff --git a/src/cmd/cgo/doc.go b/src/cmd/cgo/doc.go
index b3f371b08c4..e782c866ac7 100644
--- a/src/cmd/cgo/doc.go
+++ b/src/cmd/cgo/doc.go
@@ -721,7 +721,7 @@ linkage to the desired libraries. The main function is provided by
_cgo_main.c:
int main() { return 0; }
- void crosscall2(void(*fn)(void*, int, uintptr_t), void *a, int c, uintptr_t ctxt) { }
+ void crosscall2(void(*fn)(void*), void *a, int c, uintptr_t ctxt) { }
uintptr_t _cgo_wait_runtime_init_done(void) { return 0; }
void _cgo_release_context(uintptr_t ctxt) { }
char* _cgo_topofstack(void) { return (char*)0; }
diff --git a/src/cmd/cgo/godefs.go b/src/cmd/cgo/godefs.go
index b4fd9c5a6e3..c0d59aee01d 100644
--- a/src/cmd/cgo/godefs.go
+++ b/src/cmd/cgo/godefs.go
@@ -16,7 +16,7 @@ import (
)
// godefs returns the output for -godefs mode.
-func (p *Package) godefs(f *File, srcfile string) string {
+func (p *Package) godefs(f *File) string {
var buf bytes.Buffer
fmt.Fprintf(&buf, "// Code generated by cmd/cgo -godefs; DO NOT EDIT.\n")
diff --git a/src/cmd/cgo/main.go b/src/cmd/cgo/main.go
index 7d02ac3c546..c1116e28ecd 100644
--- a/src/cmd/cgo/main.go
+++ b/src/cmd/cgo/main.go
@@ -243,6 +243,8 @@ var gccgopkgpath = flag.String("gccgopkgpath", "", "-fgo-pkgpath option used wit
var gccgoMangler func(string) string
var importRuntimeCgo = flag.Bool("import_runtime_cgo", true, "import runtime/cgo in generated code")
var importSyscall = flag.Bool("import_syscall", true, "import syscall in generated code")
+var trimpath = flag.String("trimpath", "", "applies supplied rewrites or trims prefixes to recorded source file paths")
+
var goarch, goos string
func main() {
@@ -322,6 +324,13 @@ func main() {
input = filepath.Join(*srcDir, input)
}
+ // Create absolute path for file, so that it will be used in error
+ // messages and recorded in debug line number information.
+ // This matches the rest of the toolchain. See golang.org/issue/5122.
+ if aname, err := filepath.Abs(input); err == nil {
+ input = aname
+ }
+
b, err := ioutil.ReadFile(input)
if err != nil {
fatalf("%s", err)
@@ -330,6 +339,10 @@ func main() {
fatalf("%s", err)
}
+ // Apply trimpath to the file path. The path won't be read from after this point.
+ input, _ = objabi.ApplyRewrites(input, *trimpath)
+ goFiles[i] = input
+
f := new(File)
f.Edit = edit.NewBuffer(b)
f.ParseGo(input, b)
@@ -367,7 +380,7 @@ func main() {
p.PackagePath = f.Package
p.Record(f)
if *godefs {
- os.Stdout.WriteString(p.godefs(f, input))
+ os.Stdout.WriteString(p.godefs(f))
} else {
p.writeOutput(f, input)
}
diff --git a/src/cmd/cgo/out.go b/src/cmd/cgo/out.go
index b447b076450..eef54f2d0f6 100644
--- a/src/cmd/cgo/out.go
+++ b/src/cmd/cgo/out.go
@@ -59,14 +59,14 @@ func (p *Package) writeDefs() {
// Write C main file for using gcc to resolve imports.
fmt.Fprintf(fm, "int main() { return 0; }\n")
if *importRuntimeCgo {
- fmt.Fprintf(fm, "void crosscall2(void(*fn)(void*, int, __SIZE_TYPE__), void *a, int c, __SIZE_TYPE__ ctxt) { }\n")
+ fmt.Fprintf(fm, "void crosscall2(void(*fn)(void*), void *a, int c, __SIZE_TYPE__ ctxt) { }\n")
fmt.Fprintf(fm, "__SIZE_TYPE__ _cgo_wait_runtime_init_done(void) { return 0; }\n")
fmt.Fprintf(fm, "void _cgo_release_context(__SIZE_TYPE__ ctxt) { }\n")
fmt.Fprintf(fm, "char* _cgo_topofstack(void) { return (char*)0; }\n")
} else {
// If we're not importing runtime/cgo, we *are* runtime/cgo,
// which provides these functions. We just need a prototype.
- fmt.Fprintf(fm, "void crosscall2(void(*fn)(void*, int, __SIZE_TYPE__), void *a, int c, __SIZE_TYPE__ ctxt);\n")
+ fmt.Fprintf(fm, "void crosscall2(void(*fn)(void*), void *a, int c, __SIZE_TYPE__ ctxt);\n")
fmt.Fprintf(fm, "__SIZE_TYPE__ _cgo_wait_runtime_init_done(void);\n")
fmt.Fprintf(fm, "void _cgo_release_context(__SIZE_TYPE__);\n")
}
@@ -852,7 +852,7 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
fmt.Fprintf(fgcc, "#pragma GCC diagnostic ignored \"-Wpragmas\"\n")
fmt.Fprintf(fgcc, "#pragma GCC diagnostic ignored \"-Waddress-of-packed-member\"\n")
- fmt.Fprintf(fgcc, "extern void crosscall2(void (*fn)(void *, int, __SIZE_TYPE__), void *, int, __SIZE_TYPE__);\n")
+ fmt.Fprintf(fgcc, "extern void crosscall2(void (*fn)(void *), void *, int, __SIZE_TYPE__);\n")
fmt.Fprintf(fgcc, "extern __SIZE_TYPE__ _cgo_wait_runtime_init_done(void);\n")
fmt.Fprintf(fgcc, "extern void _cgo_release_context(__SIZE_TYPE__);\n\n")
fmt.Fprintf(fgcc, "extern char* _cgo_topofstack(void);")
@@ -862,59 +862,48 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
for _, exp := range p.ExpFunc {
fn := exp.Func
- // Construct a gcc struct matching the gc argument and
- // result frame. The gcc struct will be compiled with
- // __attribute__((packed)) so all padding must be accounted
- // for explicitly.
+ // Construct a struct that will be used to communicate
+ // arguments from C to Go. The C and Go definitions
+ // just have to agree. The gcc struct will be compiled
+ // with __attribute__((packed)) so all padding must be
+ // accounted for explicitly.
ctype := "struct {\n"
+ gotype := new(bytes.Buffer)
+ fmt.Fprintf(gotype, "struct {\n")
off := int64(0)
npad := 0
- if fn.Recv != nil {
- t := p.cgoType(fn.Recv.List[0].Type)
- ctype += fmt.Sprintf("\t\t%s recv;\n", t.C)
+ argField := func(typ ast.Expr, namePat string, args ...interface{}) {
+ name := fmt.Sprintf(namePat, args...)
+ t := p.cgoType(typ)
+ if off%t.Align != 0 {
+ pad := t.Align - off%t.Align
+ ctype += fmt.Sprintf("\t\tchar __pad%d[%d];\n", npad, pad)
+ off += pad
+ npad++
+ }
+ ctype += fmt.Sprintf("\t\t%s %s;\n", t.C, name)
+ fmt.Fprintf(gotype, "\t\t%s ", name)
+ noSourceConf.Fprint(gotype, fset, typ)
+ fmt.Fprintf(gotype, "\n")
off += t.Size
}
+ if fn.Recv != nil {
+ argField(fn.Recv.List[0].Type, "recv")
+ }
fntype := fn.Type
forFieldList(fntype.Params,
func(i int, aname string, atype ast.Expr) {
- t := p.cgoType(atype)
- if off%t.Align != 0 {
- pad := t.Align - off%t.Align
- ctype += fmt.Sprintf("\t\tchar __pad%d[%d];\n", npad, pad)
- off += pad
- npad++
- }
- ctype += fmt.Sprintf("\t\t%s p%d;\n", t.C, i)
- off += t.Size
+ argField(atype, "p%d", i)
})
- if off%p.PtrSize != 0 {
- pad := p.PtrSize - off%p.PtrSize
- ctype += fmt.Sprintf("\t\tchar __pad%d[%d];\n", npad, pad)
- off += pad
- npad++
- }
forFieldList(fntype.Results,
func(i int, aname string, atype ast.Expr) {
- t := p.cgoType(atype)
- if off%t.Align != 0 {
- pad := t.Align - off%t.Align
- ctype += fmt.Sprintf("\t\tchar __pad%d[%d];\n", npad, pad)
- off += pad
- npad++
- }
- ctype += fmt.Sprintf("\t\t%s r%d;\n", t.C, i)
- off += t.Size
+ argField(atype, "r%d", i)
})
- if off%p.PtrSize != 0 {
- pad := p.PtrSize - off%p.PtrSize
- ctype += fmt.Sprintf("\t\tchar __pad%d[%d];\n", npad, pad)
- off += pad
- npad++
- }
if ctype == "struct {\n" {
ctype += "\t\tchar unused;\n" // avoid empty struct
}
ctype += "\t}"
+ fmt.Fprintf(gotype, "\t}")
// Get the return type of the wrapper function
// compiled by gcc.
@@ -939,7 +928,11 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
}
// Build the wrapper function compiled by gcc.
- s := fmt.Sprintf("%s %s(", gccResult, exp.ExpName)
+ gccExport := ""
+ if goos == "windows" {
+ gccExport = "__declspec(dllexport)"
+ }
+ s := fmt.Sprintf("%s %s %s(", gccExport, gccResult, exp.ExpName)
if fn.Recv != nil {
s += p.cgoType(fn.Recv.List[0].Type).C.String()
s += " recv"
@@ -961,12 +954,15 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
}
fmt.Fprintf(fgcch, "extern %s;\n", s)
- fmt.Fprintf(fgcc, "extern void _cgoexp%s_%s(void *, int, __SIZE_TYPE__);\n", cPrefix, exp.ExpName)
+ fmt.Fprintf(fgcc, "extern void _cgoexp%s_%s(void *);\n", cPrefix, exp.ExpName)
fmt.Fprintf(fgcc, "\nCGO_NO_SANITIZE_THREAD")
fmt.Fprintf(fgcc, "\n%s\n", s)
fmt.Fprintf(fgcc, "{\n")
fmt.Fprintf(fgcc, "\t__SIZE_TYPE__ _cgo_ctxt = _cgo_wait_runtime_init_done();\n")
- fmt.Fprintf(fgcc, "\t%s %v _cgo_a;\n", ctype, p.packedAttribute())
+ // The results part of the argument structure must be
+ // initialized to 0 so the write barriers generated by
+ // the assignments to these fields in Go are safe.
+ fmt.Fprintf(fgcc, "\t%s %v _cgo_a = {0};\n", ctype, p.packedAttribute())
if gccResult != "void" && (len(fntype.Results.List) > 1 || len(fntype.Results.List[0].Names) > 1) {
fmt.Fprintf(fgcc, "\t%s r;\n", gccResult)
}
@@ -995,82 +991,28 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
fmt.Fprintf(fgcc, "}\n")
// Build the wrapper function compiled by cmd/compile.
- goname := "_cgoexpwrap" + cPrefix + "_"
- if fn.Recv != nil {
- goname += fn.Recv.List[0].Names[0].Name + "_"
- }
- goname += exp.Func.Name.Name
+ // This unpacks the argument struct above and calls the Go function.
fmt.Fprintf(fgo2, "//go:cgo_export_dynamic %s\n", exp.ExpName)
fmt.Fprintf(fgo2, "//go:linkname _cgoexp%s_%s _cgoexp%s_%s\n", cPrefix, exp.ExpName, cPrefix, exp.ExpName)
fmt.Fprintf(fgo2, "//go:cgo_export_static _cgoexp%s_%s\n", cPrefix, exp.ExpName)
- fmt.Fprintf(fgo2, "//go:nosplit\n") // no split stack, so no use of m or g
- fmt.Fprintf(fgo2, "//go:norace\n") // must not have race detector calls inserted
- fmt.Fprintf(fgo2, "func _cgoexp%s_%s(a unsafe.Pointer, n int32, ctxt uintptr) {\n", cPrefix, exp.ExpName)
- fmt.Fprintf(fgo2, "\tfn := %s\n", goname)
- // The indirect here is converting from a Go function pointer to a C function pointer.
- fmt.Fprintf(fgo2, "\t_cgo_runtime_cgocallback(**(**unsafe.Pointer)(unsafe.Pointer(&fn)), a, uintptr(n), ctxt);\n")
- fmt.Fprintf(fgo2, "}\n")
+ fmt.Fprintf(fgo2, "func _cgoexp%s_%s(a *%s) {\n", cPrefix, exp.ExpName, gotype)
fmt.Fprintf(fm, "int _cgoexp%s_%s;\n", cPrefix, exp.ExpName)
- // This code uses printer.Fprint, not conf.Fprint,
- // because we don't want //line comments in the middle
- // of the function types.
- fmt.Fprintf(fgo2, "\n")
- fmt.Fprintf(fgo2, "func %s(", goname)
- comma := false
- if fn.Recv != nil {
- fmt.Fprintf(fgo2, "recv ")
- printer.Fprint(fgo2, fset, fn.Recv.List[0].Type)
- comma = true
- }
- forFieldList(fntype.Params,
- func(i int, aname string, atype ast.Expr) {
- if comma {
- fmt.Fprintf(fgo2, ", ")
- }
- fmt.Fprintf(fgo2, "p%d ", i)
- printer.Fprint(fgo2, fset, atype)
- comma = true
- })
- fmt.Fprintf(fgo2, ")")
if gccResult != "void" {
- fmt.Fprint(fgo2, " (")
+ // Write results back to frame.
+ fmt.Fprintf(fgo2, "\t")
forFieldList(fntype.Results,
func(i int, aname string, atype ast.Expr) {
if i > 0 {
- fmt.Fprint(fgo2, ", ")
+ fmt.Fprintf(fgo2, ", ")
}
- fmt.Fprintf(fgo2, "r%d ", i)
- printer.Fprint(fgo2, fset, atype)
+ fmt.Fprintf(fgo2, "a.r%d", i)
})
- fmt.Fprint(fgo2, ")")
- }
- fmt.Fprint(fgo2, " {\n")
- if gccResult == "void" {
- fmt.Fprint(fgo2, "\t")
- } else {
- // Verify that any results don't contain any
- // Go pointers.
- addedDefer := false
- forFieldList(fntype.Results,
- func(i int, aname string, atype ast.Expr) {
- if !p.hasPointer(nil, atype, false) {
- return
- }
- if !addedDefer {
- fmt.Fprint(fgo2, "\tdefer func() {\n")
- addedDefer = true
- }
- fmt.Fprintf(fgo2, "\t\t_cgoCheckResult(r%d)\n", i)
- })
- if addedDefer {
- fmt.Fprint(fgo2, "\t}()\n")
- }
- fmt.Fprint(fgo2, "\treturn ")
+ fmt.Fprintf(fgo2, " = ")
}
if fn.Recv != nil {
- fmt.Fprintf(fgo2, "recv.")
+ fmt.Fprintf(fgo2, "a.recv.")
}
fmt.Fprintf(fgo2, "%s(", exp.Func.Name)
forFieldList(fntype.Params,
@@ -1078,9 +1020,20 @@ func (p *Package) writeExports(fgo2, fm, fgcc, fgcch io.Writer) {
if i > 0 {
fmt.Fprint(fgo2, ", ")
}
- fmt.Fprintf(fgo2, "p%d", i)
+ fmt.Fprintf(fgo2, "a.p%d", i)
})
fmt.Fprint(fgo2, ")\n")
+ if gccResult != "void" {
+ // Verify that any results don't contain any
+ // Go pointers.
+ forFieldList(fntype.Results,
+ func(i int, aname string, atype ast.Expr) {
+ if !p.hasPointer(nil, atype, false) {
+ return
+ }
+ fmt.Fprintf(fgo2, "\t_cgoCheckResult(a.r%d)\n", i)
+ })
+ }
fmt.Fprint(fgo2, "}\n")
}
@@ -1578,9 +1531,6 @@ const goProlog = `
//go:linkname _cgo_runtime_cgocall runtime.cgocall
func _cgo_runtime_cgocall(unsafe.Pointer, uintptr) int32
-//go:linkname _cgo_runtime_cgocallback runtime.cgocallback
-func _cgo_runtime_cgocallback(unsafe.Pointer, unsafe.Pointer, uintptr, uintptr)
-
//go:linkname _cgoCheckPointer runtime.cgoCheckPointer
func _cgoCheckPointer(interface{}, interface{})
diff --git a/src/cmd/compile/fmtmap_test.go b/src/cmd/compile/fmtmap_test.go
index bcc82dda2fe..a7c1eca9dbe 100644
--- a/src/cmd/compile/fmtmap_test.go
+++ b/src/cmd/compile/fmtmap_test.go
@@ -133,10 +133,8 @@ var knownFormats = map[string]string{
"cmd/compile/internal/ssa.GCNode %v": "",
"cmd/compile/internal/ssa.ID %d": "",
"cmd/compile/internal/ssa.ID %v": "",
- "cmd/compile/internal/ssa.LocPair %s": "",
"cmd/compile/internal/ssa.LocalSlot %s": "",
"cmd/compile/internal/ssa.LocalSlot %v": "",
- "cmd/compile/internal/ssa.Location %T": "",
"cmd/compile/internal/ssa.Location %s": "",
"cmd/compile/internal/ssa.Op %s": "",
"cmd/compile/internal/ssa.Op %v": "",
diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go
index 4ac877986cd..76e33a36899 100644
--- a/src/cmd/compile/internal/amd64/ssa.go
+++ b/src/cmd/compile/internal/amd64/ssa.go
@@ -42,10 +42,11 @@ func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
// loadByType returns the load instruction of the given type.
func loadByType(t *types.Type) obj.As {
// Avoid partial register write
- if !t.IsFloat() && t.Size() <= 2 {
- if t.Size() == 1 {
+ if !t.IsFloat() {
+ switch t.Size() {
+ case 1:
return x86.AMOVBLZX
- } else {
+ case 2:
return x86.AMOVWLZX
}
}
@@ -1070,7 +1071,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p := s.Prog(v.Op.Asm())
val := v.AuxInt
// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
- if val != 0 && val != 1 && val != 2 && val != 3 {
+ if val < 0 || val > 3 {
v.Fatalf("Invalid rounding mode")
}
p.From.Offset = val
@@ -1210,7 +1211,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p = s.Prog(x86.ASETEQ)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
- case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
+ case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock:
s.Prog(x86.ALOCK)
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go
index 1d6ea6b9d8e..5c695ef84cb 100644
--- a/src/cmd/compile/internal/arm64/ssa.go
+++ b/src/cmd/compile/internal/arm64/ssa.go
@@ -688,15 +688,23 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p5.To.Reg = out
gc.Patch(p2, p5)
case ssa.OpARM64LoweredAtomicAnd8,
- ssa.OpARM64LoweredAtomicOr8:
- // LDAXRB (Rarg0), Rout
+ ssa.OpARM64LoweredAtomicAnd32,
+ ssa.OpARM64LoweredAtomicOr8,
+ ssa.OpARM64LoweredAtomicOr32:
+ // LDAXRB/LDAXRW (Rarg0), Rout
// AND/OR Rarg1, Rout
- // STLXRB Rout, (Rarg0), Rtmp
+ // STLXRB/STLXRB Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
+ ld := arm64.ALDAXRB
+ st := arm64.ASTLXRB
+ if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
+ ld = arm64.ALDAXRW
+ st = arm64.ASTLXRW
+ }
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
out := v.Reg0()
- p := s.Prog(arm64.ALDAXRB)
+ p := s.Prog(ld)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r0
p.To.Type = obj.TYPE_REG
@@ -706,7 +714,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p1.From.Reg = r1
p1.To.Type = obj.TYPE_REG
p1.To.Reg = out
- p2 := s.Prog(arm64.ASTLXRB)
+ p2 := s.Prog(st)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = out
p2.To.Type = obj.TYPE_MEM
diff --git a/src/cmd/compile/internal/gc/alg.go b/src/cmd/compile/internal/gc/alg.go
index 2ab69c2c564..2f7fa27bb91 100644
--- a/src/cmd/compile/internal/gc/alg.go
+++ b/src/cmd/compile/internal/gc/alg.go
@@ -529,6 +529,10 @@ func geneq(t *types.Type) *obj.LSym {
fn := dclfunc(sym, tfn)
np := asNode(tfn.Type.Params().Field(0).Nname)
nq := asNode(tfn.Type.Params().Field(1).Nname)
+ nr := asNode(tfn.Type.Results().Field(0).Nname)
+
+ // Label to jump to if an equality test fails.
+ neq := autolabel(".neq")
// We reach here only for types that have equality but
// cannot be handled by the standard algorithms,
@@ -555,13 +559,13 @@ func geneq(t *types.Type) *obj.LSym {
// for i := 0; i < nelem; i++ {
// if eq(p[i], q[i]) {
// } else {
- // return
+ // goto neq
// }
// }
//
// TODO(josharian): consider doing some loop unrolling
// for larger nelem as well, processing a few elements at a time in a loop.
- checkAll := func(unroll int64, eq func(pi, qi *Node) *Node) {
+ checkAll := func(unroll int64, last bool, eq func(pi, qi *Node) *Node) {
// checkIdx generates a node to check for equality at index i.
checkIdx := func(i *Node) *Node {
// pi := p[i]
@@ -576,37 +580,38 @@ func geneq(t *types.Type) *obj.LSym {
}
if nelem <= unroll {
- // Generate a series of checks.
- var cond *Node
- for i := int64(0); i < nelem; i++ {
- c := nodintconst(i)
- check := checkIdx(c)
- if cond == nil {
- cond = check
- continue
- }
- cond = nod(OANDAND, cond, check)
+ if last {
+ // Do last comparison in a different manner.
+ nelem--
+ }
+ // Generate a series of checks.
+ for i := int64(0); i < nelem; i++ {
+ // if check {} else { goto neq }
+ nif := nod(OIF, checkIdx(nodintconst(i)), nil)
+ nif.Rlist.Append(nodSym(OGOTO, nil, neq))
+ fn.Nbody.Append(nif)
+ }
+ if last {
+ fn.Nbody.Append(nod(OAS, nr, checkIdx(nodintconst(nelem))))
+ }
+ } else {
+ // Generate a for loop.
+ // for i := 0; i < nelem; i++
+ i := temp(types.Types[TINT])
+ init := nod(OAS, i, nodintconst(0))
+ cond := nod(OLT, i, nodintconst(nelem))
+ post := nod(OAS, i, nod(OADD, i, nodintconst(1)))
+ loop := nod(OFOR, cond, post)
+ loop.Ninit.Append(init)
+ // if eq(pi, qi) {} else { goto neq }
+ nif := nod(OIF, checkIdx(i), nil)
+ nif.Rlist.Append(nodSym(OGOTO, nil, neq))
+ loop.Nbody.Append(nif)
+ fn.Nbody.Append(loop)
+ if last {
+ fn.Nbody.Append(nod(OAS, nr, nodbool(true)))
}
- nif := nod(OIF, cond, nil)
- nif.Rlist.Append(nod(ORETURN, nil, nil))
- fn.Nbody.Append(nif)
- return
}
-
- // Generate a for loop.
- // for i := 0; i < nelem; i++
- i := temp(types.Types[TINT])
- init := nod(OAS, i, nodintconst(0))
- cond := nod(OLT, i, nodintconst(nelem))
- post := nod(OAS, i, nod(OADD, i, nodintconst(1)))
- loop := nod(OFOR, cond, post)
- loop.Ninit.Append(init)
- // if eq(pi, qi) {} else { return }
- check := checkIdx(i)
- nif := nod(OIF, check, nil)
- nif.Rlist.Append(nod(ORETURN, nil, nil))
- loop.Nbody.Append(nif)
- fn.Nbody.Append(loop)
}
switch t.Elem().Etype {
@@ -614,32 +619,28 @@ func geneq(t *types.Type) *obj.LSym {
// Do two loops. First, check that all the lengths match (cheap).
// Second, check that all the contents match (expensive).
// TODO: when the array size is small, unroll the length match checks.
- checkAll(3, func(pi, qi *Node) *Node {
+ checkAll(3, false, func(pi, qi *Node) *Node {
// Compare lengths.
eqlen, _ := eqstring(pi, qi)
return eqlen
})
- checkAll(1, func(pi, qi *Node) *Node {
+ checkAll(1, true, func(pi, qi *Node) *Node {
// Compare contents.
_, eqmem := eqstring(pi, qi)
return eqmem
})
case TFLOAT32, TFLOAT64:
- checkAll(2, func(pi, qi *Node) *Node {
+ checkAll(2, true, func(pi, qi *Node) *Node {
// p[i] == q[i]
return nod(OEQ, pi, qi)
})
// TODO: pick apart structs, do them piecemeal too
default:
- checkAll(1, func(pi, qi *Node) *Node {
+ checkAll(1, true, func(pi, qi *Node) *Node {
// p[i] == q[i]
return nod(OEQ, pi, qi)
})
}
- // return true
- ret := nod(ORETURN, nil, nil)
- ret.List.Append(nodbool(true))
- fn.Nbody.Append(ret)
case TSTRUCT:
// Build a list of conditions to satisfy.
@@ -717,21 +718,41 @@ func geneq(t *types.Type) *obj.LSym {
flatConds = append(flatConds, c...)
}
- var cond *Node
if len(flatConds) == 0 {
- cond = nodbool(true)
+ fn.Nbody.Append(nod(OAS, nr, nodbool(true)))
} else {
- cond = flatConds[0]
- for _, c := range flatConds[1:] {
- cond = nod(OANDAND, cond, c)
+ for _, c := range flatConds[:len(flatConds)-1] {
+ // if cond {} else { goto neq }
+ n := nod(OIF, c, nil)
+ n.Rlist.Append(nodSym(OGOTO, nil, neq))
+ fn.Nbody.Append(n)
}
+ fn.Nbody.Append(nod(OAS, nr, flatConds[len(flatConds)-1]))
}
-
- ret := nod(ORETURN, nil, nil)
- ret.List.Append(cond)
- fn.Nbody.Append(ret)
}
+ // ret:
+ // return
+ ret := autolabel(".ret")
+ fn.Nbody.Append(nodSym(OLABEL, nil, ret))
+ fn.Nbody.Append(nod(ORETURN, nil, nil))
+
+ // neq:
+ // r = false
+ // return (or goto ret)
+ fn.Nbody.Append(nodSym(OLABEL, nil, neq))
+ fn.Nbody.Append(nod(OAS, nr, nodbool(false)))
+ if EqCanPanic(t) || hasCall(fn) {
+ // Epilogue is large, so share it with the equal case.
+ fn.Nbody.Append(nodSym(OGOTO, nil, ret))
+ } else {
+ // Epilogue is small, so don't bother sharing.
+ fn.Nbody.Append(nod(ORETURN, nil, nil))
+ }
+ // TODO(khr): the epilogue size detection condition above isn't perfect.
+ // We should really do a generic CL that shares epilogues across
+ // the board. See #24936.
+
if Debug.r != 0 {
dumplist("geneq body", fn.Nbody)
}
@@ -762,6 +783,39 @@ func geneq(t *types.Type) *obj.LSym {
return closure
}
+func hasCall(n *Node) bool {
+ if n.Op == OCALL || n.Op == OCALLFUNC {
+ return true
+ }
+ if n.Left != nil && hasCall(n.Left) {
+ return true
+ }
+ if n.Right != nil && hasCall(n.Right) {
+ return true
+ }
+ for _, x := range n.Ninit.Slice() {
+ if hasCall(x) {
+ return true
+ }
+ }
+ for _, x := range n.Nbody.Slice() {
+ if hasCall(x) {
+ return true
+ }
+ }
+ for _, x := range n.List.Slice() {
+ if hasCall(x) {
+ return true
+ }
+ }
+ for _, x := range n.Rlist.Slice() {
+ if hasCall(x) {
+ return true
+ }
+ }
+ return false
+}
+
// eqfield returns the node
// p.field == q.field
func eqfield(p *Node, q *Node, field *types.Sym) *Node {
diff --git a/src/cmd/compile/internal/gc/builtin.go b/src/cmd/compile/internal/gc/builtin.go
index da7b107bfe7..fd95b657b26 100644
--- a/src/cmd/compile/internal/gc/builtin.go
+++ b/src/cmd/compile/internal/gc/builtin.go
@@ -44,6 +44,7 @@ var runtimeDecls = [...]struct {
{"printcomplex", funcTag, 27},
{"printstring", funcTag, 29},
{"printpointer", funcTag, 30},
+ {"printuintptr", funcTag, 31},
{"printiface", funcTag, 30},
{"printeface", funcTag, 30},
{"printslice", funcTag, 30},
@@ -51,134 +52,134 @@ var runtimeDecls = [...]struct {
{"printsp", funcTag, 9},
{"printlock", funcTag, 9},
{"printunlock", funcTag, 9},
- {"concatstring2", funcTag, 33},
- {"concatstring3", funcTag, 34},
- {"concatstring4", funcTag, 35},
- {"concatstring5", funcTag, 36},
- {"concatstrings", funcTag, 38},
- {"cmpstring", funcTag, 39},
- {"intstring", funcTag, 42},
- {"slicebytetostring", funcTag, 43},
- {"slicebytetostringtmp", funcTag, 44},
- {"slicerunetostring", funcTag, 47},
- {"stringtoslicebyte", funcTag, 49},
- {"stringtoslicerune", funcTag, 52},
- {"slicecopy", funcTag, 53},
- {"decoderune", funcTag, 54},
- {"countrunes", funcTag, 55},
- {"convI2I", funcTag, 56},
- {"convT16", funcTag, 57},
- {"convT32", funcTag, 57},
- {"convT64", funcTag, 57},
- {"convTstring", funcTag, 57},
- {"convTslice", funcTag, 57},
- {"convT2E", funcTag, 58},
- {"convT2Enoptr", funcTag, 58},
- {"convT2I", funcTag, 58},
- {"convT2Inoptr", funcTag, 58},
- {"assertE2I", funcTag, 56},
- {"assertE2I2", funcTag, 59},
- {"assertI2I", funcTag, 56},
- {"assertI2I2", funcTag, 59},
- {"panicdottypeE", funcTag, 60},
- {"panicdottypeI", funcTag, 60},
- {"panicnildottype", funcTag, 61},
- {"ifaceeq", funcTag, 63},
- {"efaceeq", funcTag, 63},
- {"fastrand", funcTag, 65},
- {"makemap64", funcTag, 67},
- {"makemap", funcTag, 68},
- {"makemap_small", funcTag, 69},
- {"mapaccess1", funcTag, 70},
- {"mapaccess1_fast32", funcTag, 71},
- {"mapaccess1_fast64", funcTag, 71},
- {"mapaccess1_faststr", funcTag, 71},
- {"mapaccess1_fat", funcTag, 72},
- {"mapaccess2", funcTag, 73},
- {"mapaccess2_fast32", funcTag, 74},
- {"mapaccess2_fast64", funcTag, 74},
- {"mapaccess2_faststr", funcTag, 74},
- {"mapaccess2_fat", funcTag, 75},
- {"mapassign", funcTag, 70},
- {"mapassign_fast32", funcTag, 71},
- {"mapassign_fast32ptr", funcTag, 71},
- {"mapassign_fast64", funcTag, 71},
- {"mapassign_fast64ptr", funcTag, 71},
- {"mapassign_faststr", funcTag, 71},
- {"mapiterinit", funcTag, 76},
- {"mapdelete", funcTag, 76},
- {"mapdelete_fast32", funcTag, 77},
- {"mapdelete_fast64", funcTag, 77},
- {"mapdelete_faststr", funcTag, 77},
- {"mapiternext", funcTag, 78},
- {"mapclear", funcTag, 79},
- {"makechan64", funcTag, 81},
- {"makechan", funcTag, 82},
- {"chanrecv1", funcTag, 84},
- {"chanrecv2", funcTag, 85},
- {"chansend1", funcTag, 87},
+ {"concatstring2", funcTag, 34},
+ {"concatstring3", funcTag, 35},
+ {"concatstring4", funcTag, 36},
+ {"concatstring5", funcTag, 37},
+ {"concatstrings", funcTag, 39},
+ {"cmpstring", funcTag, 40},
+ {"intstring", funcTag, 43},
+ {"slicebytetostring", funcTag, 44},
+ {"slicebytetostringtmp", funcTag, 45},
+ {"slicerunetostring", funcTag, 48},
+ {"stringtoslicebyte", funcTag, 50},
+ {"stringtoslicerune", funcTag, 53},
+ {"slicecopy", funcTag, 54},
+ {"decoderune", funcTag, 55},
+ {"countrunes", funcTag, 56},
+ {"convI2I", funcTag, 57},
+ {"convT16", funcTag, 58},
+ {"convT32", funcTag, 58},
+ {"convT64", funcTag, 58},
+ {"convTstring", funcTag, 58},
+ {"convTslice", funcTag, 58},
+ {"convT2E", funcTag, 59},
+ {"convT2Enoptr", funcTag, 59},
+ {"convT2I", funcTag, 59},
+ {"convT2Inoptr", funcTag, 59},
+ {"assertE2I", funcTag, 57},
+ {"assertE2I2", funcTag, 60},
+ {"assertI2I", funcTag, 57},
+ {"assertI2I2", funcTag, 60},
+ {"panicdottypeE", funcTag, 61},
+ {"panicdottypeI", funcTag, 61},
+ {"panicnildottype", funcTag, 62},
+ {"ifaceeq", funcTag, 64},
+ {"efaceeq", funcTag, 64},
+ {"fastrand", funcTag, 66},
+ {"makemap64", funcTag, 68},
+ {"makemap", funcTag, 69},
+ {"makemap_small", funcTag, 70},
+ {"mapaccess1", funcTag, 71},
+ {"mapaccess1_fast32", funcTag, 72},
+ {"mapaccess1_fast64", funcTag, 72},
+ {"mapaccess1_faststr", funcTag, 72},
+ {"mapaccess1_fat", funcTag, 73},
+ {"mapaccess2", funcTag, 74},
+ {"mapaccess2_fast32", funcTag, 75},
+ {"mapaccess2_fast64", funcTag, 75},
+ {"mapaccess2_faststr", funcTag, 75},
+ {"mapaccess2_fat", funcTag, 76},
+ {"mapassign", funcTag, 71},
+ {"mapassign_fast32", funcTag, 72},
+ {"mapassign_fast32ptr", funcTag, 72},
+ {"mapassign_fast64", funcTag, 72},
+ {"mapassign_fast64ptr", funcTag, 72},
+ {"mapassign_faststr", funcTag, 72},
+ {"mapiterinit", funcTag, 77},
+ {"mapdelete", funcTag, 77},
+ {"mapdelete_fast32", funcTag, 78},
+ {"mapdelete_fast64", funcTag, 78},
+ {"mapdelete_faststr", funcTag, 78},
+ {"mapiternext", funcTag, 79},
+ {"mapclear", funcTag, 80},
+ {"makechan64", funcTag, 82},
+ {"makechan", funcTag, 83},
+ {"chanrecv1", funcTag, 85},
+ {"chanrecv2", funcTag, 86},
+ {"chansend1", funcTag, 88},
{"closechan", funcTag, 30},
- {"writeBarrier", varTag, 89},
- {"typedmemmove", funcTag, 90},
- {"typedmemclr", funcTag, 91},
- {"typedslicecopy", funcTag, 92},
- {"selectnbsend", funcTag, 93},
- {"selectnbrecv", funcTag, 94},
- {"selectnbrecv2", funcTag, 96},
- {"selectsetpc", funcTag, 97},
- {"selectgo", funcTag, 98},
+ {"writeBarrier", varTag, 90},
+ {"typedmemmove", funcTag, 91},
+ {"typedmemclr", funcTag, 92},
+ {"typedslicecopy", funcTag, 93},
+ {"selectnbsend", funcTag, 94},
+ {"selectnbrecv", funcTag, 95},
+ {"selectnbrecv2", funcTag, 97},
+ {"selectsetpc", funcTag, 98},
+ {"selectgo", funcTag, 99},
{"block", funcTag, 9},
- {"makeslice", funcTag, 99},
- {"makeslice64", funcTag, 100},
- {"makeslicecopy", funcTag, 101},
- {"growslice", funcTag, 103},
- {"memmove", funcTag, 104},
- {"memclrNoHeapPointers", funcTag, 105},
- {"memclrHasPointers", funcTag, 105},
- {"memequal", funcTag, 106},
- {"memequal0", funcTag, 107},
- {"memequal8", funcTag, 107},
- {"memequal16", funcTag, 107},
- {"memequal32", funcTag, 107},
- {"memequal64", funcTag, 107},
- {"memequal128", funcTag, 107},
- {"f32equal", funcTag, 108},
- {"f64equal", funcTag, 108},
- {"c64equal", funcTag, 108},
- {"c128equal", funcTag, 108},
- {"strequal", funcTag, 108},
- {"interequal", funcTag, 108},
- {"nilinterequal", funcTag, 108},
- {"memhash", funcTag, 109},
- {"memhash0", funcTag, 110},
- {"memhash8", funcTag, 110},
- {"memhash16", funcTag, 110},
- {"memhash32", funcTag, 110},
- {"memhash64", funcTag, 110},
- {"memhash128", funcTag, 110},
- {"f32hash", funcTag, 110},
- {"f64hash", funcTag, 110},
- {"c64hash", funcTag, 110},
- {"c128hash", funcTag, 110},
- {"strhash", funcTag, 110},
- {"interhash", funcTag, 110},
- {"nilinterhash", funcTag, 110},
- {"int64div", funcTag, 111},
- {"uint64div", funcTag, 112},
- {"int64mod", funcTag, 111},
- {"uint64mod", funcTag, 112},
- {"float64toint64", funcTag, 113},
- {"float64touint64", funcTag, 114},
- {"float64touint32", funcTag, 115},
- {"int64tofloat64", funcTag, 116},
- {"uint64tofloat64", funcTag, 117},
- {"uint32tofloat64", funcTag, 118},
- {"complex128div", funcTag, 119},
- {"racefuncenter", funcTag, 120},
+ {"makeslice", funcTag, 100},
+ {"makeslice64", funcTag, 101},
+ {"makeslicecopy", funcTag, 102},
+ {"growslice", funcTag, 104},
+ {"memmove", funcTag, 105},
+ {"memclrNoHeapPointers", funcTag, 106},
+ {"memclrHasPointers", funcTag, 106},
+ {"memequal", funcTag, 107},
+ {"memequal0", funcTag, 108},
+ {"memequal8", funcTag, 108},
+ {"memequal16", funcTag, 108},
+ {"memequal32", funcTag, 108},
+ {"memequal64", funcTag, 108},
+ {"memequal128", funcTag, 108},
+ {"f32equal", funcTag, 109},
+ {"f64equal", funcTag, 109},
+ {"c64equal", funcTag, 109},
+ {"c128equal", funcTag, 109},
+ {"strequal", funcTag, 109},
+ {"interequal", funcTag, 109},
+ {"nilinterequal", funcTag, 109},
+ {"memhash", funcTag, 110},
+ {"memhash0", funcTag, 111},
+ {"memhash8", funcTag, 111},
+ {"memhash16", funcTag, 111},
+ {"memhash32", funcTag, 111},
+ {"memhash64", funcTag, 111},
+ {"memhash128", funcTag, 111},
+ {"f32hash", funcTag, 111},
+ {"f64hash", funcTag, 111},
+ {"c64hash", funcTag, 111},
+ {"c128hash", funcTag, 111},
+ {"strhash", funcTag, 111},
+ {"interhash", funcTag, 111},
+ {"nilinterhash", funcTag, 111},
+ {"int64div", funcTag, 112},
+ {"uint64div", funcTag, 113},
+ {"int64mod", funcTag, 112},
+ {"uint64mod", funcTag, 113},
+ {"float64toint64", funcTag, 114},
+ {"float64touint64", funcTag, 115},
+ {"float64touint32", funcTag, 116},
+ {"int64tofloat64", funcTag, 117},
+ {"uint64tofloat64", funcTag, 118},
+ {"uint32tofloat64", funcTag, 119},
+ {"complex128div", funcTag, 120},
+ {"racefuncenter", funcTag, 31},
{"racefuncenterfp", funcTag, 9},
{"racefuncexit", funcTag, 9},
- {"raceread", funcTag, 120},
- {"racewrite", funcTag, 120},
+ {"raceread", funcTag, 31},
+ {"racewrite", funcTag, 31},
{"racereadrange", funcTag, 121},
{"racewriterange", funcTag, 121},
{"msanread", funcTag, 121},
@@ -233,96 +234,96 @@ func runtimeTypes() []*types.Type {
typs[28] = types.Types[TSTRING]
typs[29] = functype(nil, []*Node{anonfield(typs[28])}, nil)
typs[30] = functype(nil, []*Node{anonfield(typs[2])}, nil)
- typs[31] = types.NewArray(typs[0], 32)
- typs[32] = types.NewPtr(typs[31])
- typs[33] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
- typs[34] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
- typs[35] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
- typs[36] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
- typs[37] = types.NewSlice(typs[28])
- typs[38] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[37])}, []*Node{anonfield(typs[28])})
- typs[39] = functype(nil, []*Node{anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[15])})
- typs[40] = types.NewArray(typs[0], 4)
- typs[41] = types.NewPtr(typs[40])
- typs[42] = functype(nil, []*Node{anonfield(typs[41]), anonfield(typs[22])}, []*Node{anonfield(typs[28])})
- typs[43] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[28])})
- typs[44] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[28])})
- typs[45] = types.Runetype
- typs[46] = types.NewSlice(typs[45])
- typs[47] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[46])}, []*Node{anonfield(typs[28])})
- typs[48] = types.NewSlice(typs[0])
- typs[49] = functype(nil, []*Node{anonfield(typs[32]), anonfield(typs[28])}, []*Node{anonfield(typs[48])})
- typs[50] = types.NewArray(typs[45], 32)
- typs[51] = types.NewPtr(typs[50])
- typs[52] = functype(nil, []*Node{anonfield(typs[51]), anonfield(typs[28])}, []*Node{anonfield(typs[46])})
- typs[53] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[5])}, []*Node{anonfield(typs[15])})
- typs[54] = functype(nil, []*Node{anonfield(typs[28]), anonfield(typs[15])}, []*Node{anonfield(typs[45]), anonfield(typs[15])})
- typs[55] = functype(nil, []*Node{anonfield(typs[28])}, []*Node{anonfield(typs[15])})
- typs[56] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2])})
- typs[57] = functype(nil, []*Node{anonfield(typs[2])}, []*Node{anonfield(typs[7])})
- typs[58] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, []*Node{anonfield(typs[2])})
- typs[59] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2]), anonfield(typs[6])})
- typs[60] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[1])}, nil)
- typs[61] = functype(nil, []*Node{anonfield(typs[1])}, nil)
- typs[62] = types.NewPtr(typs[5])
- typs[63] = functype(nil, []*Node{anonfield(typs[62]), anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
- typs[64] = types.Types[TUINT32]
- typs[65] = functype(nil, nil, []*Node{anonfield(typs[64])})
- typs[66] = types.NewMap(typs[2], typs[2])
- typs[67] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[3])}, []*Node{anonfield(typs[66])})
- typs[68] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[3])}, []*Node{anonfield(typs[66])})
- typs[69] = functype(nil, nil, []*Node{anonfield(typs[66])})
- typs[70] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[3])}, []*Node{anonfield(typs[3])})
- typs[71] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[2])}, []*Node{anonfield(typs[3])})
- typs[72] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3])})
- typs[73] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[3])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
- typs[74] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[2])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
- typs[75] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
- typs[76] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[3])}, nil)
- typs[77] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66]), anonfield(typs[2])}, nil)
- typs[78] = functype(nil, []*Node{anonfield(typs[3])}, nil)
- typs[79] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[66])}, nil)
- typs[80] = types.NewChan(typs[2], types.Cboth)
- typs[81] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22])}, []*Node{anonfield(typs[80])})
- typs[82] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[80])})
- typs[83] = types.NewChan(typs[2], types.Crecv)
- typs[84] = functype(nil, []*Node{anonfield(typs[83]), anonfield(typs[3])}, nil)
- typs[85] = functype(nil, []*Node{anonfield(typs[83]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
- typs[86] = types.NewChan(typs[2], types.Csend)
- typs[87] = functype(nil, []*Node{anonfield(typs[86]), anonfield(typs[3])}, nil)
- typs[88] = types.NewArray(typs[0], 3)
- typs[89] = tostruct([]*Node{namedfield("enabled", typs[6]), namedfield("pad", typs[88]), namedfield("needed", typs[6]), namedfield("cgo", typs[6]), namedfield("alignme", typs[24])})
- typs[90] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[3])}, nil)
- typs[91] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, nil)
- typs[92] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[3]), anonfield(typs[15])}, []*Node{anonfield(typs[15])})
- typs[93] = functype(nil, []*Node{anonfield(typs[86]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
- typs[94] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[83])}, []*Node{anonfield(typs[6])})
- typs[95] = types.NewPtr(typs[6])
- typs[96] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[95]), anonfield(typs[83])}, []*Node{anonfield(typs[6])})
- typs[97] = functype(nil, []*Node{anonfield(typs[62])}, nil)
- typs[98] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[62]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[6])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
- typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])})
- typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])})
- typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])})
- typs[102] = types.NewSlice(typs[2])
- typs[103] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[102]), anonfield(typs[15])}, []*Node{anonfield(typs[102])})
- typs[104] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil)
- typs[105] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil)
- typs[106] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])})
- typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
- typs[108] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
- typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
- typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
- typs[111] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])})
- typs[112] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])})
- typs[113] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])})
- typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])})
- typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[64])})
- typs[116] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])})
- typs[117] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])})
- typs[118] = functype(nil, []*Node{anonfield(typs[64])}, []*Node{anonfield(typs[20])})
- typs[119] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])})
- typs[120] = functype(nil, []*Node{anonfield(typs[5])}, nil)
+ typs[31] = functype(nil, []*Node{anonfield(typs[5])}, nil)
+ typs[32] = types.NewArray(typs[0], 32)
+ typs[33] = types.NewPtr(typs[32])
+ typs[34] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
+ typs[35] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
+ typs[36] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
+ typs[37] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[28])})
+ typs[38] = types.NewSlice(typs[28])
+ typs[39] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[38])}, []*Node{anonfield(typs[28])})
+ typs[40] = functype(nil, []*Node{anonfield(typs[28]), anonfield(typs[28])}, []*Node{anonfield(typs[15])})
+ typs[41] = types.NewArray(typs[0], 4)
+ typs[42] = types.NewPtr(typs[41])
+ typs[43] = functype(nil, []*Node{anonfield(typs[42]), anonfield(typs[22])}, []*Node{anonfield(typs[28])})
+ typs[44] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[28])})
+ typs[45] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[28])})
+ typs[46] = types.Runetype
+ typs[47] = types.NewSlice(typs[46])
+ typs[48] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[47])}, []*Node{anonfield(typs[28])})
+ typs[49] = types.NewSlice(typs[0])
+ typs[50] = functype(nil, []*Node{anonfield(typs[33]), anonfield(typs[28])}, []*Node{anonfield(typs[49])})
+ typs[51] = types.NewArray(typs[46], 32)
+ typs[52] = types.NewPtr(typs[51])
+ typs[53] = functype(nil, []*Node{anonfield(typs[52]), anonfield(typs[28])}, []*Node{anonfield(typs[47])})
+ typs[54] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[5])}, []*Node{anonfield(typs[15])})
+ typs[55] = functype(nil, []*Node{anonfield(typs[28]), anonfield(typs[15])}, []*Node{anonfield(typs[46]), anonfield(typs[15])})
+ typs[56] = functype(nil, []*Node{anonfield(typs[28])}, []*Node{anonfield(typs[15])})
+ typs[57] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2])})
+ typs[58] = functype(nil, []*Node{anonfield(typs[2])}, []*Node{anonfield(typs[7])})
+ typs[59] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, []*Node{anonfield(typs[2])})
+ typs[60] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[2])}, []*Node{anonfield(typs[2]), anonfield(typs[6])})
+ typs[61] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[1])}, nil)
+ typs[62] = functype(nil, []*Node{anonfield(typs[1])}, nil)
+ typs[63] = types.NewPtr(typs[5])
+ typs[64] = functype(nil, []*Node{anonfield(typs[63]), anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
+ typs[65] = types.Types[TUINT32]
+ typs[66] = functype(nil, nil, []*Node{anonfield(typs[65])})
+ typs[67] = types.NewMap(typs[2], typs[2])
+ typs[68] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[3])}, []*Node{anonfield(typs[67])})
+ typs[69] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[3])}, []*Node{anonfield(typs[67])})
+ typs[70] = functype(nil, nil, []*Node{anonfield(typs[67])})
+ typs[71] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[3])}, []*Node{anonfield(typs[3])})
+ typs[72] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[2])}, []*Node{anonfield(typs[3])})
+ typs[73] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3])})
+ typs[74] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[3])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
+ typs[75] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[2])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
+ typs[76] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[3]), anonfield(typs[1])}, []*Node{anonfield(typs[3]), anonfield(typs[6])})
+ typs[77] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[3])}, nil)
+ typs[78] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67]), anonfield(typs[2])}, nil)
+ typs[79] = functype(nil, []*Node{anonfield(typs[3])}, nil)
+ typs[80] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[67])}, nil)
+ typs[81] = types.NewChan(typs[2], types.Cboth)
+ typs[82] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22])}, []*Node{anonfield(typs[81])})
+ typs[83] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15])}, []*Node{anonfield(typs[81])})
+ typs[84] = types.NewChan(typs[2], types.Crecv)
+ typs[85] = functype(nil, []*Node{anonfield(typs[84]), anonfield(typs[3])}, nil)
+ typs[86] = functype(nil, []*Node{anonfield(typs[84]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
+ typs[87] = types.NewChan(typs[2], types.Csend)
+ typs[88] = functype(nil, []*Node{anonfield(typs[87]), anonfield(typs[3])}, nil)
+ typs[89] = types.NewArray(typs[0], 3)
+ typs[90] = tostruct([]*Node{namedfield("enabled", typs[6]), namedfield("pad", typs[89]), namedfield("needed", typs[6]), namedfield("cgo", typs[6]), namedfield("alignme", typs[24])})
+ typs[91] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[3])}, nil)
+ typs[92] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3])}, nil)
+ typs[93] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[3]), anonfield(typs[15]), anonfield(typs[3]), anonfield(typs[15])}, []*Node{anonfield(typs[15])})
+ typs[94] = functype(nil, []*Node{anonfield(typs[87]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
+ typs[95] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[84])}, []*Node{anonfield(typs[6])})
+ typs[96] = types.NewPtr(typs[6])
+ typs[97] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[96]), anonfield(typs[84])}, []*Node{anonfield(typs[6])})
+ typs[98] = functype(nil, []*Node{anonfield(typs[63])}, nil)
+ typs[99] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[1]), anonfield(typs[63]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[6])}, []*Node{anonfield(typs[15]), anonfield(typs[6])})
+ typs[100] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15])}, []*Node{anonfield(typs[7])})
+ typs[101] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[7])})
+ typs[102] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[15]), anonfield(typs[15]), anonfield(typs[7])}, []*Node{anonfield(typs[7])})
+ typs[103] = types.NewSlice(typs[2])
+ typs[104] = functype(nil, []*Node{anonfield(typs[1]), anonfield(typs[103]), anonfield(typs[15])}, []*Node{anonfield(typs[103])})
+ typs[105] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, nil)
+ typs[106] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, nil)
+ typs[107] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3]), anonfield(typs[5])}, []*Node{anonfield(typs[6])})
+ typs[108] = functype(nil, []*Node{anonfield(typs[3]), anonfield(typs[3])}, []*Node{anonfield(typs[6])})
+ typs[109] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[7])}, []*Node{anonfield(typs[6])})
+ typs[110] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
+ typs[111] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[5])}, []*Node{anonfield(typs[5])})
+ typs[112] = functype(nil, []*Node{anonfield(typs[22]), anonfield(typs[22])}, []*Node{anonfield(typs[22])})
+ typs[113] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, []*Node{anonfield(typs[24])})
+ typs[114] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[22])})
+ typs[115] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[24])})
+ typs[116] = functype(nil, []*Node{anonfield(typs[20])}, []*Node{anonfield(typs[65])})
+ typs[117] = functype(nil, []*Node{anonfield(typs[22])}, []*Node{anonfield(typs[20])})
+ typs[118] = functype(nil, []*Node{anonfield(typs[24])}, []*Node{anonfield(typs[20])})
+ typs[119] = functype(nil, []*Node{anonfield(typs[65])}, []*Node{anonfield(typs[20])})
+ typs[120] = functype(nil, []*Node{anonfield(typs[26]), anonfield(typs[26])}, []*Node{anonfield(typs[26])})
typs[121] = functype(nil, []*Node{anonfield(typs[5]), anonfield(typs[5])}, nil)
typs[122] = functype(nil, []*Node{anonfield(typs[7]), anonfield(typs[1]), anonfield(typs[5])}, nil)
typs[123] = types.NewSlice(typs[7])
@@ -331,7 +332,7 @@ func runtimeTypes() []*types.Type {
typs[126] = functype(nil, []*Node{anonfield(typs[125]), anonfield(typs[125])}, nil)
typs[127] = types.Types[TUINT16]
typs[128] = functype(nil, []*Node{anonfield(typs[127]), anonfield(typs[127])}, nil)
- typs[129] = functype(nil, []*Node{anonfield(typs[64]), anonfield(typs[64])}, nil)
+ typs[129] = functype(nil, []*Node{anonfield(typs[65]), anonfield(typs[65])}, nil)
typs[130] = functype(nil, []*Node{anonfield(typs[24]), anonfield(typs[24])}, nil)
return typs[:]
}
diff --git a/src/cmd/compile/internal/gc/builtin/runtime.go b/src/cmd/compile/internal/gc/builtin/runtime.go
index 02d6c7b7f5d..aac2de38c69 100644
--- a/src/cmd/compile/internal/gc/builtin/runtime.go
+++ b/src/cmd/compile/internal/gc/builtin/runtime.go
@@ -54,6 +54,7 @@ func printuint(uint64)
func printcomplex(complex128)
func printstring(string)
func printpointer(any)
+func printuintptr(uintptr)
func printiface(any)
func printeface(any)
func printslice(any)
diff --git a/src/cmd/compile/internal/gc/embed.go b/src/cmd/compile/internal/gc/embed.go
new file mode 100644
index 00000000000..103949c1f9f
--- /dev/null
+++ b/src/cmd/compile/internal/gc/embed.go
@@ -0,0 +1,273 @@
+// Copyright 2020 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package gc
+
+import (
+ "cmd/compile/internal/syntax"
+ "cmd/compile/internal/types"
+ "cmd/internal/obj"
+ "encoding/json"
+ "io/ioutil"
+ "log"
+ "path"
+ "sort"
+ "strconv"
+ "strings"
+)
+
+var embedlist []*Node
+
+var embedCfg struct {
+ Patterns map[string][]string
+ Files map[string]string
+}
+
+func readEmbedCfg(file string) {
+ data, err := ioutil.ReadFile(file)
+ if err != nil {
+ log.Fatalf("-embedcfg: %v", err)
+ }
+ if err := json.Unmarshal(data, &embedCfg); err != nil {
+ log.Fatalf("%s: %v", file, err)
+ }
+ if embedCfg.Patterns == nil {
+ log.Fatalf("%s: invalid embedcfg: missing Patterns", file)
+ }
+ if embedCfg.Files == nil {
+ log.Fatalf("%s: invalid embedcfg: missing Files", file)
+ }
+}
+
+const (
+ embedUnknown = iota
+ embedBytes
+ embedString
+ embedFiles
+)
+
+var numLocalEmbed int
+
+func varEmbed(p *noder, names []*Node, typ *Node, exprs []*Node, embeds []PragmaEmbed) (newExprs []*Node) {
+ haveEmbed := false
+ for _, decl := range p.file.DeclList {
+ imp, ok := decl.(*syntax.ImportDecl)
+ if !ok {
+ // imports always come first
+ break
+ }
+ path, _ := strconv.Unquote(imp.Path.Value)
+ if path == "embed" {
+ haveEmbed = true
+ break
+ }
+ }
+
+ pos := embeds[0].Pos
+ if !haveEmbed {
+ p.yyerrorpos(pos, "invalid go:embed: missing import \"embed\"")
+ return exprs
+ }
+ if embedCfg.Patterns == nil {
+ p.yyerrorpos(pos, "invalid go:embed: build system did not supply embed configuration")
+ return exprs
+ }
+ if len(names) > 1 {
+ p.yyerrorpos(pos, "go:embed cannot apply to multiple vars")
+ return exprs
+ }
+ if len(exprs) > 0 {
+ p.yyerrorpos(pos, "go:embed cannot apply to var with initializer")
+ return exprs
+ }
+ if typ == nil {
+ // Should not happen, since len(exprs) == 0 now.
+ p.yyerrorpos(pos, "go:embed cannot apply to var without type")
+ return exprs
+ }
+
+ kind := embedKindApprox(typ)
+ if kind == embedUnknown {
+ p.yyerrorpos(pos, "go:embed cannot apply to var of type %v", typ)
+ return exprs
+ }
+
+ // Build list of files to store.
+ have := make(map[string]bool)
+ var list []string
+ for _, e := range embeds {
+ for _, pattern := range e.Patterns {
+ files, ok := embedCfg.Patterns[pattern]
+ if !ok {
+ p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map pattern: %s", pattern)
+ }
+ for _, file := range files {
+ if embedCfg.Files[file] == "" {
+ p.yyerrorpos(e.Pos, "invalid go:embed: build system did not map file: %s", file)
+ continue
+ }
+ if !have[file] {
+ have[file] = true
+ list = append(list, file)
+ }
+ if kind == embedFiles {
+ for dir := path.Dir(file); dir != "." && !have[dir]; dir = path.Dir(dir) {
+ have[dir] = true
+ list = append(list, dir+"/")
+ }
+ }
+ }
+ }
+ }
+ sort.Slice(list, func(i, j int) bool {
+ return embedFileLess(list[i], list[j])
+ })
+
+ if kind == embedString || kind == embedBytes {
+ if len(list) > 1 {
+ p.yyerrorpos(pos, "invalid go:embed: multiple files for type %v", typ)
+ return exprs
+ }
+ }
+
+ v := names[0]
+ if dclcontext != PEXTERN {
+ numLocalEmbed++
+ v = newnamel(v.Pos, lookupN("embed.", numLocalEmbed))
+ v.Sym.Def = asTypesNode(v)
+ v.Name.Param.Ntype = typ
+ v.SetClass(PEXTERN)
+ externdcl = append(externdcl, v)
+ exprs = []*Node{v}
+ }
+
+ v.Name.Param.SetEmbedFiles(list)
+ embedlist = append(embedlist, v)
+ return exprs
+}
+
+// embedKindApprox determines the kind of embedding variable, approximately.
+// The match is approximate because we haven't done scope resolution yet and
+// can't tell whether "string" and "byte" really mean "string" and "byte".
+// The result must be confirmed later, after type checking, using embedKind.
+func embedKindApprox(typ *Node) int {
+ if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
+ return embedFiles
+ }
+ // These are not guaranteed to match only string and []byte -
+ // maybe the local package has redefined one of those words.
+ // But it's the best we can do now during the noder.
+ // The stricter check happens later, in initEmbed calling embedKind.
+ if typ.Sym != nil && typ.Sym.Name == "string" && typ.Sym.Pkg == localpkg {
+ return embedString
+ }
+ if typ.Op == OTARRAY && typ.Left == nil && typ.Right.Sym != nil && typ.Right.Sym.Name == "byte" && typ.Right.Sym.Pkg == localpkg {
+ return embedBytes
+ }
+ return embedUnknown
+}
+
+// embedKind determines the kind of embedding variable.
+func embedKind(typ *types.Type) int {
+ if typ.Sym != nil && typ.Sym.Name == "FS" && (typ.Sym.Pkg.Path == "embed" || (typ.Sym.Pkg == localpkg && myimportpath == "embed")) {
+ return embedFiles
+ }
+ if typ == types.Types[TSTRING] {
+ return embedString
+ }
+ if typ.Sym == nil && typ.IsSlice() && typ.Elem() == types.Bytetype {
+ return embedBytes
+ }
+ return embedUnknown
+}
+
+func embedFileNameSplit(name string) (dir, elem string, isDir bool) {
+ if name[len(name)-1] == '/' {
+ isDir = true
+ name = name[:len(name)-1]
+ }
+ i := len(name) - 1
+ for i >= 0 && name[i] != '/' {
+ i--
+ }
+ if i < 0 {
+ return ".", name, isDir
+ }
+ return name[:i], name[i+1:], isDir
+}
+
+// embedFileLess implements the sort order for a list of embedded files.
+// See the comment inside ../../../../embed/embed.go's Files struct for rationale.
+func embedFileLess(x, y string) bool {
+ xdir, xelem, _ := embedFileNameSplit(x)
+ ydir, yelem, _ := embedFileNameSplit(y)
+ return xdir < ydir || xdir == ydir && xelem < yelem
+}
+
+func dumpembeds() {
+ for _, v := range embedlist {
+ initEmbed(v)
+ }
+}
+
+// initEmbed emits the init data for a //go:embed variable,
+// which is either a string, a []byte, or an embed.FS.
+func initEmbed(v *Node) {
+ files := v.Name.Param.EmbedFiles()
+ switch kind := embedKind(v.Type); kind {
+ case embedUnknown:
+ yyerrorl(v.Pos, "go:embed cannot apply to var of type %v", v.Type)
+
+ case embedString, embedBytes:
+ file := files[0]
+ fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], kind == embedString, nil)
+ if err != nil {
+ yyerrorl(v.Pos, "embed %s: %v", file, err)
+ }
+ sym := v.Sym.Linksym()
+ off := 0
+ off = dsymptr(sym, off, fsym, 0) // data string
+ off = duintptr(sym, off, uint64(size)) // len
+ if kind == embedBytes {
+ duintptr(sym, off, uint64(size)) // cap for slice
+ }
+
+ case embedFiles:
+ slicedata := Ctxt.Lookup(`"".` + v.Sym.Name + `.files`)
+ off := 0
+ // []files pointed at by Files
+ off = dsymptr(slicedata, off, slicedata, 3*Widthptr) // []file, pointing just past slice
+ off = duintptr(slicedata, off, uint64(len(files)))
+ off = duintptr(slicedata, off, uint64(len(files)))
+
+ // embed/embed.go type file is:
+ // name string
+ // data string
+ // hash [16]byte
+ // Emit one of these per file in the set.
+ const hashSize = 16
+ hash := make([]byte, hashSize)
+ for _, file := range files {
+ off = dsymptr(slicedata, off, stringsym(v.Pos, file), 0) // file string
+ off = duintptr(slicedata, off, uint64(len(file)))
+ if strings.HasSuffix(file, "/") {
+ // entry for directory - no data
+ off = duintptr(slicedata, off, 0)
+ off = duintptr(slicedata, off, 0)
+ off += hashSize
+ } else {
+ fsym, size, err := fileStringSym(v.Pos, embedCfg.Files[file], true, hash)
+ if err != nil {
+ yyerrorl(v.Pos, "embed %s: %v", file, err)
+ }
+ off = dsymptr(slicedata, off, fsym, 0) // data string
+ off = duintptr(slicedata, off, uint64(size))
+ off = int(slicedata.WriteBytes(Ctxt, int64(off), hash))
+ }
+ }
+ ggloblsym(slicedata, int32(off), obj.RODATA|obj.LOCAL)
+ sym := v.Sym.Linksym()
+ dsymptr(sym, 0, slicedata, 0)
+ }
+}
diff --git a/src/cmd/compile/internal/gc/fmt.go b/src/cmd/compile/internal/gc/fmt.go
index d7ed1d2ff09..240b09bb6d3 100644
--- a/src/cmd/compile/internal/gc/fmt.go
+++ b/src/cmd/compile/internal/gc/fmt.go
@@ -419,10 +419,17 @@ func (n *Node) format(s fmt.State, verb rune, mode fmtMode) {
func (n *Node) jconv(s fmt.State, flag FmtFlag) {
c := flag & FmtShort
+ // Useful to see which nodes in an AST printout are actually identical
+ fmt.Fprintf(s, " p(%p)", n)
if c == 0 && n.Name != nil && n.Name.Vargen != 0 {
fmt.Fprintf(s, " g(%d)", n.Name.Vargen)
}
+ if c == 0 && n.Name != nil && n.Name.Defn != nil {
+ // Useful to see where Defn is set and what node it points to
+ fmt.Fprintf(s, " defn(%p)", n.Name.Defn)
+ }
+
if n.Pos.IsKnown() {
pfx := ""
switch n.Pos.IsStmt() {
@@ -492,6 +499,15 @@ func (n *Node) jconv(s fmt.State, flag FmtFlag) {
if n.Name.Assigned() {
fmt.Fprint(s, " assigned")
}
+ if n.Name.IsClosureVar() {
+ fmt.Fprint(s, " closurevar")
+ }
+ if n.Name.Captured() {
+ fmt.Fprint(s, " captured")
+ }
+ if n.Name.IsOutputParamHeapAddr() {
+ fmt.Fprint(s, " outputparamheapaddr")
+ }
}
if n.Bounded() {
fmt.Fprint(s, " bounded")
@@ -1710,6 +1726,9 @@ func (n *Node) nodedump(s fmt.State, flag FmtFlag, mode fmtMode) {
}
}
+ if n.Op == OCLOSURE && n.Func.Closure != nil && n.Func.Closure.Func.Nname.Sym != nil {
+ mode.Fprintf(s, " fnName %v", n.Func.Closure.Func.Nname.Sym)
+ }
if n.Sym != nil && n.Op != ONAME {
mode.Fprintf(s, " %v", n.Sym)
}
@@ -1725,6 +1744,16 @@ func (n *Node) nodedump(s fmt.State, flag FmtFlag, mode fmtMode) {
if n.Right != nil {
mode.Fprintf(s, "%v", n.Right)
}
+ if n.Func != nil && n.Func.Closure != nil && n.Func.Closure.Nbody.Len() != 0 {
+ indent(s)
+ // The function associated with a closure
+ mode.Fprintf(s, "%v-clofunc%v", n.Op, n.Func.Closure)
+ }
+ if n.Func != nil && n.Func.Dcl != nil && len(n.Func.Dcl) != 0 {
+ indent(s)
+ // The dcls for a func or closure
+ mode.Fprintf(s, "%v-dcl%v", n.Op, asNodes(n.Func.Dcl))
+ }
if n.List.Len() != 0 {
indent(s)
mode.Fprintf(s, "%v-list%v", n.Op, n.List)
diff --git a/src/cmd/compile/internal/gc/gsubr.go b/src/cmd/compile/internal/gc/gsubr.go
index ce5182f2032..864ada1d3cd 100644
--- a/src/cmd/compile/internal/gc/gsubr.go
+++ b/src/cmd/compile/internal/gc/gsubr.go
@@ -70,12 +70,8 @@ func newProgs(fn *Node, worker int) *Progs {
pp.pos = fn.Pos
pp.settext(fn)
// PCDATA tables implicitly start with index -1.
- pp.prevLive = LivenessIndex{-1, -1, false}
- if go115ReduceLiveness {
- pp.nextLive = pp.prevLive
- } else {
- pp.nextLive = LivenessInvalid
- }
+ pp.prevLive = LivenessIndex{-1, false}
+ pp.nextLive = pp.prevLive
return pp
}
@@ -120,31 +116,15 @@ func (pp *Progs) Prog(as obj.As) *obj.Prog {
Addrconst(&p.From, objabi.PCDATA_StackMapIndex)
Addrconst(&p.To, int64(idx))
}
- if !go115ReduceLiveness {
+ if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
+ // Emit unsafe-point marker.
+ pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
+ p := pp.Prog(obj.APCDATA)
+ Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
if pp.nextLive.isUnsafePoint {
- // Unsafe points are encoded as a special value in the
- // register map.
- pp.nextLive.regMapIndex = objabi.PCDATA_RegMapUnsafe
- }
- if pp.nextLive.regMapIndex != pp.prevLive.regMapIndex {
- // Emit register map index change.
- idx := pp.nextLive.regMapIndex
- pp.prevLive.regMapIndex = idx
- p := pp.Prog(obj.APCDATA)
- Addrconst(&p.From, objabi.PCDATA_RegMapIndex)
- Addrconst(&p.To, int64(idx))
- }
- } else {
- if pp.nextLive.isUnsafePoint != pp.prevLive.isUnsafePoint {
- // Emit unsafe-point marker.
- pp.prevLive.isUnsafePoint = pp.nextLive.isUnsafePoint
- p := pp.Prog(obj.APCDATA)
- Addrconst(&p.From, objabi.PCDATA_UnsafePoint)
- if pp.nextLive.isUnsafePoint {
- Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
- } else {
- Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
- }
+ Addrconst(&p.To, objabi.PCDATA_UnsafePointUnsafe)
+ } else {
+ Addrconst(&p.To, objabi.PCDATA_UnsafePointSafe)
}
}
diff --git a/src/cmd/compile/internal/gc/inl.go b/src/cmd/compile/internal/gc/inl.go
index a2fb00e1323..253036fea64 100644
--- a/src/cmd/compile/internal/gc/inl.go
+++ b/src/cmd/compile/internal/gc/inl.go
@@ -257,21 +257,39 @@ func inlFlood(n *Node) {
typecheckinl(n)
+ // Recursively identify all referenced functions for
+ // reexport. We want to include even non-called functions,
+ // because after inlining they might be callable.
inspectList(asNodes(n.Func.Inl.Body), func(n *Node) bool {
switch n.Op {
case ONAME:
- // Mark any referenced global variables or
- // functions for reexport. Skip methods,
- // because they're reexported alongside their
- // receiver type.
- if n.Class() == PEXTERN || n.Class() == PFUNC && !n.isMethodExpression() {
+ switch n.Class() {
+ case PFUNC:
+ if n.isMethodExpression() {
+ inlFlood(asNode(n.Type.Nname()))
+ } else {
+ inlFlood(n)
+ exportsym(n)
+ }
+ case PEXTERN:
exportsym(n)
}
- case OCALLFUNC, OCALLMETH:
- // Recursively flood any functions called by
- // this one.
- inlFlood(asNode(n.Left.Type.Nname()))
+ case ODOTMETH:
+ fn := asNode(n.Type.Nname())
+ inlFlood(fn)
+
+ case OCALLPART:
+ // Okay, because we don't yet inline indirect
+ // calls to method values.
+ case OCLOSURE:
+ // If the closure is inlinable, we'll need to
+ // flood it too. But today we don't support
+ // inlining functions that contain closures.
+ //
+ // When we do, we'll probably want:
+ // inlFlood(n.Func.Closure.Func.Nname)
+ Fatalf("unexpected closure in inlinable function")
}
return true
})
@@ -574,13 +592,11 @@ func inlnode(n *Node, maxCost int32, inlMap map[*Node]bool) *Node {
}
switch n.Op {
- // inhibit inlining of their argument
case ODEFER, OGO:
switch n.Left.Op {
case OCALLFUNC, OCALLMETH:
n.Left.SetNoInline(true)
}
- return n
// TODO do them here (or earlier),
// so escape analysis can avoid more heapmoves.
@@ -708,7 +724,14 @@ func inlCallee(fn *Node) *Node {
switch {
case fn.Op == ONAME && fn.Class() == PFUNC:
if fn.isMethodExpression() {
- return asNode(fn.Sym.Def)
+ n := asNode(fn.Type.Nname())
+ // Check that receiver type matches fn.Left.
+ // TODO(mdempsky): Handle implicit dereference
+ // of pointer receiver argument?
+ if n == nil || !types.Identical(n.Type.Recv().Type, fn.Left.Type) {
+ return nil
+ }
+ return n
}
return fn
case fn.Op == OCLOSURE:
@@ -721,6 +744,11 @@ func inlCallee(fn *Node) *Node {
func staticValue(n *Node) *Node {
for {
+ if n.Op == OCONVNOP {
+ n = n.Left
+ continue
+ }
+
n1 := staticValue1(n)
if n1 == nil {
return n
@@ -811,14 +839,12 @@ func (v *reassignVisitor) visit(n *Node) *Node {
if n.Left == v.name && n != v.name.Name.Defn {
return n
}
- return nil
case OAS2, OAS2FUNC, OAS2MAPR, OAS2DOTTYPE:
for _, p := range n.List.Slice() {
if p == v.name && n != v.name.Name.Defn {
return n
}
}
- return nil
}
if a := v.visit(n.Left); a != nil {
return a
@@ -1011,15 +1037,28 @@ func mkinlcall(n, fn *Node, maxCost int32, inlMap map[*Node]bool) *Node {
}
}
+ nreturns := 0
+ inspectList(asNodes(fn.Func.Inl.Body), func(n *Node) bool {
+ if n != nil && n.Op == ORETURN {
+ nreturns++
+ }
+ return true
+ })
+
+ // We can delay declaring+initializing result parameters if:
+ // (1) there's only one "return" statement in the inlined
+ // function, and (2) the result parameters aren't named.
+ delayretvars := nreturns == 1
+
// temporaries for return values.
var retvars []*Node
for i, t := range fn.Type.Results().Fields().Slice() {
var m *Node
- mpos := t.Pos
- if n := asNode(t.Nname); n != nil && !n.isBlank() {
+ if n := asNode(t.Nname); n != nil && !n.isBlank() && !strings.HasPrefix(n.Sym.Name, "~r") {
m = inlvar(n)
m = typecheck(m, ctxExpr)
inlvars[n] = m
+ delayretvars = false // found a named result parameter
} else {
// anonymous return values, synthesize names for use in assignment that replaces return
m = retvar(t, i)
@@ -1031,12 +1070,11 @@ func mkinlcall(n, fn *Node, maxCost int32, inlMap map[*Node]bool) *Node {
// were not part of the original callee.
if !strings.HasPrefix(m.Sym.Name, "~R") {
m.Name.SetInlFormal(true)
- m.Pos = mpos
+ m.Pos = t.Pos
inlfvars = append(inlfvars, m)
}
}
- ninit.Append(nod(ODCL, m, nil))
retvars = append(retvars, m)
}
@@ -1097,11 +1135,14 @@ func mkinlcall(n, fn *Node, maxCost int32, inlMap map[*Node]bool) *Node {
ninit.Append(vas)
}
- // Zero the return parameters.
- for _, n := range retvars {
- ras := nod(OAS, n, nil)
- ras = typecheck(ras, ctxStmt)
- ninit.Append(ras)
+ if !delayretvars {
+ // Zero the return parameters.
+ for _, n := range retvars {
+ ninit.Append(nod(ODCL, n, nil))
+ ras := nod(OAS, n, nil)
+ ras = typecheck(ras, ctxStmt)
+ ninit.Append(ras)
+ }
}
retlabel := autolabel(".i")
@@ -1132,11 +1173,12 @@ func mkinlcall(n, fn *Node, maxCost int32, inlMap map[*Node]bool) *Node {
}
subst := inlsubst{
- retlabel: retlabel,
- retvars: retvars,
- inlvars: inlvars,
- bases: make(map[*src.PosBase]*src.PosBase),
- newInlIndex: newIndex,
+ retlabel: retlabel,
+ retvars: retvars,
+ delayretvars: delayretvars,
+ inlvars: inlvars,
+ bases: make(map[*src.PosBase]*src.PosBase),
+ newInlIndex: newIndex,
}
body := subst.list(asNodes(fn.Func.Inl.Body))
@@ -1232,6 +1274,10 @@ type inlsubst struct {
// Temporary result variables.
retvars []*Node
+ // Whether result variables should be initialized at the
+ // "return" statement.
+ delayretvars bool
+
inlvars map[*Node]*Node
// bases maps from original PosBase to PosBase with an extra
@@ -1300,6 +1346,14 @@ func (subst *inlsubst) node(n *Node) *Node {
as.List.Append(n)
}
as.Rlist.Set(subst.list(n.List))
+
+ if subst.delayretvars {
+ for _, n := range as.List.Slice() {
+ as.Ninit.Append(nod(ODCL, n, nil))
+ n.Name.Defn = as
+ }
+ }
+
as = typecheck(as, ctxStmt)
m.Ninit.Append(as)
}
@@ -1362,3 +1416,68 @@ func pruneUnusedAutos(ll []*Node, vis *hairyVisitor) []*Node {
}
return s
}
+
+// devirtualize replaces interface method calls within fn with direct
+// concrete-type method calls where applicable.
+func devirtualize(fn *Node) {
+ Curfn = fn
+ inspectList(fn.Nbody, func(n *Node) bool {
+ if n.Op == OCALLINTER {
+ devirtualizeCall(n)
+ }
+ return true
+ })
+}
+
+func devirtualizeCall(call *Node) {
+ recv := staticValue(call.Left.Left)
+ if recv.Op != OCONVIFACE {
+ return
+ }
+
+ typ := recv.Left.Type
+ if typ.IsInterface() {
+ return
+ }
+
+ x := nodl(call.Left.Pos, ODOTTYPE, call.Left.Left, nil)
+ x.Type = typ
+ x = nodlSym(call.Left.Pos, OXDOT, x, call.Left.Sym)
+ x = typecheck(x, ctxExpr|ctxCallee)
+ switch x.Op {
+ case ODOTMETH:
+ if Debug.m != 0 {
+ Warnl(call.Pos, "devirtualizing %v to %v", call.Left, typ)
+ }
+ call.Op = OCALLMETH
+ call.Left = x
+ case ODOTINTER:
+ // Promoted method from embedded interface-typed field (#42279).
+ if Debug.m != 0 {
+ Warnl(call.Pos, "partially devirtualizing %v to %v", call.Left, typ)
+ }
+ call.Op = OCALLINTER
+ call.Left = x
+ default:
+ // TODO(mdempsky): Turn back into Fatalf after more testing.
+ if Debug.m != 0 {
+ Warnl(call.Pos, "failed to devirtualize %v (%v)", x, x.Op)
+ }
+ return
+ }
+
+ // Duplicated logic from typecheck for function call return
+ // value types.
+ //
+ // Receiver parameter size may have changed; need to update
+ // call.Type to get correct stack offsets for result
+ // parameters.
+ checkwidth(x.Type)
+ switch ft := x.Type; ft.NumResults() {
+ case 0:
+ case 1:
+ call.Type = ft.Results().Field(0).Type
+ default:
+ call.Type = ft.Results()
+ }
+}
diff --git a/src/cmd/compile/internal/gc/inl_test.go b/src/cmd/compile/internal/gc/inl_test.go
index afa6b983151..02735e50fb7 100644
--- a/src/cmd/compile/internal/gc/inl_test.go
+++ b/src/cmd/compile/internal/gc/inl_test.go
@@ -51,6 +51,7 @@ func TestIntendedInlining(t *testing.T) {
"funcPC",
"getArgInfoFast",
"getm",
+ "getMCache",
"isDirectIface",
"itabHashFunc",
"noescape",
diff --git a/src/cmd/compile/internal/gc/main.go b/src/cmd/compile/internal/gc/main.go
index 2a2b6bee011..b6e52f482d4 100644
--- a/src/cmd/compile/internal/gc/main.go
+++ b/src/cmd/compile/internal/gc/main.go
@@ -34,8 +34,6 @@ import (
"strings"
)
-var imported_unsafe bool
-
var (
buildid string
spectre string
@@ -241,6 +239,7 @@ func Main(archInit func(*Arch)) {
flag.BoolVar(&flagDWARF, "dwarf", !Wasm, "generate DWARF symbols")
flag.BoolVar(&Ctxt.Flag_locationlists, "dwarflocationlists", true, "add location lists to DWARF in optimized mode")
flag.IntVar(&genDwarfInline, "gendwarfinl", 2, "generate DWARF inline info records")
+ objabi.Flagfn1("embedcfg", "read go:embed configuration from `file`", readEmbedCfg)
objabi.Flagfn1("importmap", "add `definition` of the form source=actual to import map", addImportMap)
objabi.Flagfn1("importcfg", "read import configuration from `file`", readImportCfg)
flag.StringVar(&flag_installsuffix, "installsuffix", "", "set pkg directory `suffix`")
@@ -605,7 +604,7 @@ func Main(archInit func(*Arch)) {
timings.Start("fe", "typecheck", "top1")
for i := 0; i < len(xtop); i++ {
n := xtop[i]
- if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias) {
+ if op := n.Op; op != ODCL && op != OAS && op != OAS2 && (op != ODCLTYPE || !n.Left.Name.Param.Alias()) {
xtop[i] = typecheck(n, ctxStmt)
}
}
@@ -617,7 +616,7 @@ func Main(archInit func(*Arch)) {
timings.Start("fe", "typecheck", "top2")
for i := 0; i < len(xtop); i++ {
n := xtop[i]
- if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias {
+ if op := n.Op; op == ODCL || op == OAS || op == OAS2 || op == ODCLTYPE && n.Left.Name.Param.Alias() {
xtop[i] = typecheck(n, ctxStmt)
}
}
@@ -710,6 +709,13 @@ func Main(archInit func(*Arch)) {
})
}
+ for _, n := range xtop {
+ if n.Op == ODCLFUNC {
+ devirtualize(n)
+ }
+ }
+ Curfn = nil
+
// Phase 6: Escape analysis.
// Required for moving heap allocations onto stack,
// which in turn is required by the closure implementation,
@@ -1185,7 +1191,6 @@ func importfile(f *Val) *types.Pkg {
}
if path_ == "unsafe" {
- imported_unsafe = true
return unsafepkg
}
diff --git a/src/cmd/compile/internal/gc/noder.go b/src/cmd/compile/internal/gc/noder.go
index 9685794ec4f..14bacc14a8b 100644
--- a/src/cmd/compile/internal/gc/noder.go
+++ b/src/cmd/compile/internal/gc/noder.go
@@ -12,6 +12,7 @@ import (
"runtime"
"strconv"
"strings"
+ "unicode"
"unicode/utf8"
"cmd/compile/internal/importer"
@@ -152,7 +153,11 @@ func (p *noder) makeSrcPosBase(b0 *syntax.PosBase) *src.PosBase {
} else {
// line directive base
p0 := b0.Pos()
- p1 := src.MakePos(p.makeSrcPosBase(p0.Base()), p0.Line(), p0.Col())
+ p0b := p0.Base()
+ if p0b == b0 {
+ panic("infinite recursion in makeSrcPosBase")
+ }
+ p1 := src.MakePos(p.makeSrcPosBase(p0b), p0.Line(), p0.Col())
b1 = src.NewLinePragmaBase(p1, fn, fileh(fn), b0.Line(), b0.Col())
}
p.basemap[b0] = b1
@@ -192,11 +197,13 @@ type noder struct {
base *src.PosBase
}
- file *syntax.File
- linknames []linkname
- pragcgobuf [][]string
- err chan syntax.Error
- scope ScopeID
+ file *syntax.File
+ linknames []linkname
+ pragcgobuf [][]string
+ err chan syntax.Error
+ scope ScopeID
+ importedUnsafe bool
+ importedEmbed bool
// scopeVars is a stack tracking the number of variables declared in the
// current function at the moment each open scope was opened.
@@ -298,7 +305,8 @@ type linkname struct {
func (p *noder) node() {
types.Block = 1
- imported_unsafe = false
+ p.importedUnsafe = false
+ p.importedEmbed = false
p.setlineno(p.file.PkgName)
mkpackage(p.file.PkgName.Value)
@@ -311,7 +319,7 @@ func (p *noder) node() {
xtop = append(xtop, p.decls(p.file.DeclList)...)
for _, n := range p.linknames {
- if !imported_unsafe {
+ if !p.importedUnsafe {
p.yyerrorpos(n.pos, "//go:linkname only allowed in Go files that import \"unsafe\"")
continue
}
@@ -386,7 +394,6 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) {
val := p.basicLit(imp.Path)
ipkg := importfile(&val)
-
if ipkg == nil {
if nerrors == 0 {
Fatalf("phase error in import")
@@ -394,6 +401,13 @@ func (p *noder) importDecl(imp *syntax.ImportDecl) {
return
}
+ if ipkg == unsafepkg {
+ p.importedUnsafe = true
+ }
+ if ipkg.Path == "embed" {
+ p.importedEmbed = true
+ }
+
ipkg.Direct = true
var my *types.Sym
@@ -435,6 +449,20 @@ func (p *noder) varDecl(decl *syntax.VarDecl) []*Node {
}
if pragma, ok := decl.Pragma.(*Pragma); ok {
+ if len(pragma.Embeds) > 0 {
+ if !p.importedEmbed {
+ // This check can't be done when building the list pragma.Embeds
+ // because that list is created before the noder starts walking over the file,
+ // so at that point it hasn't seen the imports.
+ // We're left to check now, just before applying the //go:embed lines.
+ for _, e := range pragma.Embeds {
+ p.yyerrorpos(e.Pos, "//go:embed only allowed in Go files that import \"embed\"")
+ }
+ } else {
+ exprs = varEmbed(p, names, typ, exprs, pragma.Embeds)
+ }
+ pragma.Embeds = nil
+ }
p.checkUnused(pragma)
}
@@ -517,17 +545,17 @@ func (p *noder) typeDecl(decl *syntax.TypeDecl) *Node {
param := n.Name.Param
param.Ntype = typ
- param.Alias = decl.Alias
+ param.SetAlias(decl.Alias)
if pragma, ok := decl.Pragma.(*Pragma); ok {
if !decl.Alias {
- param.Pragma = pragma.Flag & TypePragmas
+ param.SetPragma(pragma.Flag & TypePragmas)
pragma.Flag &^= TypePragmas
}
p.checkUnused(pragma)
}
nod := p.nod(decl, ODCLTYPE, n, nil)
- if param.Alias && !langSupported(1, 9, localpkg) {
+ if param.Alias() && !langSupported(1, 9, localpkg) {
yyerrorl(nod.Pos, "type aliases only supported as of -lang=go1.9")
}
return nod
@@ -1555,13 +1583,15 @@ var allowedStdPragmas = map[string]bool{
"go:cgo_import_dynamic": true,
"go:cgo_ldflag": true,
"go:cgo_dynamic_linker": true,
+ "go:embed": true,
"go:generate": true,
}
// *Pragma is the value stored in a syntax.Pragma during parsing.
type Pragma struct {
- Flag PragmaFlag // collected bits
- Pos []PragmaPos // position of each individual flag
+ Flag PragmaFlag // collected bits
+ Pos []PragmaPos // position of each individual flag
+ Embeds []PragmaEmbed
}
type PragmaPos struct {
@@ -1569,12 +1599,22 @@ type PragmaPos struct {
Pos syntax.Pos
}
+type PragmaEmbed struct {
+ Pos syntax.Pos
+ Patterns []string
+}
+
func (p *noder) checkUnused(pragma *Pragma) {
for _, pos := range pragma.Pos {
if pos.Flag&pragma.Flag != 0 {
p.yyerrorpos(pos.Pos, "misplaced compiler directive")
}
}
+ if len(pragma.Embeds) > 0 {
+ for _, e := range pragma.Embeds {
+ p.yyerrorpos(e.Pos, "misplaced go:embed directive")
+ }
+ }
}
func (p *noder) checkUnusedDuringParse(pragma *Pragma) {
@@ -1583,6 +1623,11 @@ func (p *noder) checkUnusedDuringParse(pragma *Pragma) {
p.error(syntax.Error{Pos: pos.Pos, Msg: "misplaced compiler directive"})
}
}
+ if len(pragma.Embeds) > 0 {
+ for _, e := range pragma.Embeds {
+ p.error(syntax.Error{Pos: e.Pos, Msg: "misplaced go:embed directive"})
+ }
+ }
}
// pragma is called concurrently if files are parsed concurrently.
@@ -1627,6 +1672,17 @@ func (p *noder) pragma(pos syntax.Pos, blankLine bool, text string, old syntax.P
}
p.linknames = append(p.linknames, linkname{pos, f[1], target})
+ case text == "go:embed", strings.HasPrefix(text, "go:embed "):
+ args, err := parseGoEmbed(text[len("go:embed"):])
+ if err != nil {
+ p.error(syntax.Error{Pos: pos, Msg: err.Error()})
+ }
+ if len(args) == 0 {
+ p.error(syntax.Error{Pos: pos, Msg: "usage: //go:embed pattern..."})
+ break
+ }
+ pragma.Embeds = append(pragma.Embeds, PragmaEmbed{pos, args})
+
case strings.HasPrefix(text, "go:cgo_import_dynamic "):
// This is permitted for general use because Solaris
// code relies on it in golang.org/x/sys/unix and others.
@@ -1699,3 +1755,64 @@ func mkname(sym *types.Sym) *Node {
}
return n
}
+
+// parseGoEmbed parses the text following "//go:embed" to extract the glob patterns.
+// It accepts unquoted space-separated patterns as well as double-quoted and back-quoted Go strings.
+// go/build/read.go also processes these strings and contains similar logic.
+func parseGoEmbed(args string) ([]string, error) {
+ var list []string
+ for args = strings.TrimSpace(args); args != ""; args = strings.TrimSpace(args) {
+ var path string
+ Switch:
+ switch args[0] {
+ default:
+ i := len(args)
+ for j, c := range args {
+ if unicode.IsSpace(c) {
+ i = j
+ break
+ }
+ }
+ path = args[:i]
+ args = args[i:]
+
+ case '`':
+ i := strings.Index(args[1:], "`")
+ if i < 0 {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ path = args[1 : 1+i]
+ args = args[1+i+1:]
+
+ case '"':
+ i := 1
+ for ; i < len(args); i++ {
+ if args[i] == '\\' {
+ i++
+ continue
+ }
+ if args[i] == '"' {
+ q, err := strconv.Unquote(args[:i+1])
+ if err != nil {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args[:i+1])
+ }
+ path = q
+ args = args[i+1:]
+ break Switch
+ }
+ }
+ if i >= len(args) {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ }
+
+ if args != "" {
+ r, _ := utf8.DecodeRuneInString(args)
+ if !unicode.IsSpace(r) {
+ return nil, fmt.Errorf("invalid quoted string in //go:embed: %s", args)
+ }
+ }
+ list = append(list, path)
+ }
+ return list, nil
+}
diff --git a/src/cmd/compile/internal/gc/obj.go b/src/cmd/compile/internal/gc/obj.go
index f6557e2d158..32aa7c5bb1a 100644
--- a/src/cmd/compile/internal/gc/obj.go
+++ b/src/cmd/compile/internal/gc/obj.go
@@ -14,6 +14,8 @@ import (
"encoding/json"
"fmt"
"io"
+ "io/ioutil"
+ "os"
"sort"
"strconv"
)
@@ -125,6 +127,7 @@ func dumpdata() {
itabsLen := len(itabs)
dumpimportstrings()
dumpbasictypes()
+ dumpembeds()
// Calls to dumpsignats can generate functions,
// like method wrappers and hash and equality routines.
@@ -309,7 +312,7 @@ func addGCLocals() {
if fn == nil {
continue
}
- for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals, fn.GCRegs} {
+ for _, gcsym := range []*obj.LSym{fn.GCArgs, fn.GCLocals} {
if gcsym != nil && !gcsym.OnList() {
ggloblsym(gcsym, int32(len(gcsym.P)), obj.RODATA|obj.DUPOK)
}
@@ -358,28 +361,31 @@ func dbvec(s *obj.LSym, off int, bv bvec) int {
return off
}
+const (
+ stringSymPrefix = "go.string."
+ stringSymPattern = ".gostring.%d.%x"
+)
+
+// stringsym returns a symbol containing the string s.
+// The symbol contains the string data, not a string header.
func stringsym(pos src.XPos, s string) (data *obj.LSym) {
var symname string
if len(s) > 100 {
// Huge strings are hashed to avoid long names in object files.
// Indulge in some paranoia by writing the length of s, too,
// as protection against length extension attacks.
+ // Same pattern is known to fileStringSym below.
h := sha256.New()
io.WriteString(h, s)
- symname = fmt.Sprintf(".gostring.%d.%x", len(s), h.Sum(nil))
+ symname = fmt.Sprintf(stringSymPattern, len(s), h.Sum(nil))
} else {
// Small strings get named directly by their contents.
symname = strconv.Quote(s)
}
- const prefix = "go.string."
- symdataname := prefix + symname
-
- symdata := Ctxt.Lookup(symdataname)
-
+ symdata := Ctxt.Lookup(stringSymPrefix + symname)
if !symdata.OnList() {
- // string data
- off := dsname(symdata, 0, s, pos, "string")
+ off := dstringdata(symdata, 0, s, pos, "string")
ggloblsym(symdata, int32(off), obj.DUPOK|obj.RODATA|obj.LOCAL)
symdata.Set(obj.AttrContentAddressable, true)
}
@@ -387,26 +393,122 @@ func stringsym(pos src.XPos, s string) (data *obj.LSym) {
return symdata
}
-var slicebytes_gen int
+// fileStringSym returns a symbol for the contents and the size of file.
+// If readonly is true, the symbol shares storage with any literal string
+// or other file with the same content and is placed in a read-only section.
+// If readonly is false, the symbol is a read-write copy separate from any other,
+// for use as the backing store of a []byte.
+// The content hash of file is copied into hash. (If hash is nil, nothing is copied.)
+// The returned symbol contains the data itself, not a string header.
+func fileStringSym(pos src.XPos, file string, readonly bool, hash []byte) (*obj.LSym, int64, error) {
+ f, err := os.Open(file)
+ if err != nil {
+ return nil, 0, err
+ }
+ defer f.Close()
+ info, err := f.Stat()
+ if err != nil {
+ return nil, 0, err
+ }
+ if !info.Mode().IsRegular() {
+ return nil, 0, fmt.Errorf("not a regular file")
+ }
+ size := info.Size()
+ if size <= 1*1024 {
+ data, err := ioutil.ReadAll(f)
+ if err != nil {
+ return nil, 0, err
+ }
+ if int64(len(data)) != size {
+ return nil, 0, fmt.Errorf("file changed between reads")
+ }
+ var sym *obj.LSym
+ if readonly {
+ sym = stringsym(pos, string(data))
+ } else {
+ sym = slicedata(pos, string(data)).Sym.Linksym()
+ }
+ if len(hash) > 0 {
+ sum := sha256.Sum256(data)
+ copy(hash, sum[:])
+ }
+ return sym, size, nil
+ }
+ if size > 2e9 {
+ // ggloblsym takes an int32,
+ // and probably the rest of the toolchain
+ // can't handle such big symbols either.
+ // See golang.org/issue/9862.
+ return nil, 0, fmt.Errorf("file too large")
+ }
-func slicebytes(nam *Node, s string) {
- slicebytes_gen++
- symname := fmt.Sprintf(".gobytes.%d", slicebytes_gen)
+ // File is too big to read and keep in memory.
+ // Compute hash if needed for read-only content hashing or if the caller wants it.
+ var sum []byte
+ if readonly || len(hash) > 0 {
+ h := sha256.New()
+ n, err := io.Copy(h, f)
+ if err != nil {
+ return nil, 0, err
+ }
+ if n != size {
+ return nil, 0, fmt.Errorf("file changed between reads")
+ }
+ sum = h.Sum(nil)
+ copy(hash, sum)
+ }
+
+ var symdata *obj.LSym
+ if readonly {
+ symname := fmt.Sprintf(stringSymPattern, size, sum)
+ symdata = Ctxt.Lookup(stringSymPrefix + symname)
+ if !symdata.OnList() {
+ info := symdata.NewFileInfo()
+ info.Name = file
+ info.Size = size
+ ggloblsym(symdata, int32(size), obj.DUPOK|obj.RODATA|obj.LOCAL)
+ // Note: AttrContentAddressable cannot be set here,
+ // because the content-addressable-handling code
+ // does not know about file symbols.
+ }
+ } else {
+ // Emit a zero-length data symbol
+ // and then fix up length and content to use file.
+ symdata = slicedata(pos, "").Sym.Linksym()
+ symdata.Size = size
+ symdata.Type = objabi.SNOPTRDATA
+ info := symdata.NewFileInfo()
+ info.Name = file
+ info.Size = size
+ }
+
+ return symdata, size, nil
+}
+
+var slicedataGen int
+
+func slicedata(pos src.XPos, s string) *Node {
+ slicedataGen++
+ symname := fmt.Sprintf(".gobytes.%d", slicedataGen)
sym := localpkg.Lookup(symname)
symnode := newname(sym)
sym.Def = asTypesNode(symnode)
lsym := sym.Linksym()
- off := dsname(lsym, 0, s, nam.Pos, "slice")
+ off := dstringdata(lsym, 0, s, pos, "slice")
ggloblsym(lsym, int32(off), obj.NOPTR|obj.LOCAL)
+ return symnode
+}
+
+func slicebytes(nam *Node, s string) {
if nam.Op != ONAME {
Fatalf("slicebytes %v", nam)
}
- slicesym(nam, symnode, int64(len(s)))
+ slicesym(nam, slicedata(nam.Pos, s), int64(len(s)))
}
-func dsname(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
+func dstringdata(s *obj.LSym, off int, t string, pos src.XPos, what string) int {
// Objects that are too large will cause the data section to overflow right away,
// causing a cryptic error message by the linker. Check for oversize objects here
// and provide a useful error message instead.
diff --git a/src/cmd/compile/internal/gc/plive.go b/src/cmd/compile/internal/gc/plive.go
index b471accb658..a48173e0d65 100644
--- a/src/cmd/compile/internal/gc/plive.go
+++ b/src/cmd/compile/internal/gc/plive.go
@@ -24,16 +24,6 @@ import (
"strings"
)
-// go115ReduceLiveness disables register maps and only produces stack
-// maps at call sites.
-//
-// In Go 1.15, we changed debug call injection to use conservative
-// scanning instead of precise pointer maps, so these are no longer
-// necessary.
-//
-// Keep in sync with runtime/preempt.go:go115ReduceLiveness.
-const go115ReduceLiveness = true
-
// OpVarDef is an annotation for the liveness analysis, marking a place
// where a complete initialization (definition) of a variable begins.
// Since the liveness analysis can see initialization of single-word
@@ -96,15 +86,15 @@ type BlockEffects struct {
//
// uevar: upward exposed variables (used before set in block)
// varkill: killed variables (set in block)
- uevar varRegVec
- varkill varRegVec
+ uevar bvec
+ varkill bvec
// Computed during Liveness.solve using control flow information:
//
// livein: variables live at block entry
// liveout: variables live at block exit
- livein varRegVec
- liveout varRegVec
+ livein bvec
+ liveout bvec
}
// A collection of global state used by liveness analysis.
@@ -128,16 +118,14 @@ type Liveness struct {
// current Block during Liveness.epilogue. Indexed in Value
// order for that block. Additionally, for the entry block
// livevars[0] is the entry bitmap. Liveness.compact moves
- // these to stackMaps and regMaps.
- livevars []varRegVec
+ // these to stackMaps.
+ livevars []bvec
// livenessMap maps from safe points (i.e., CALLs) to their
// liveness map indexes.
livenessMap LivenessMap
stackMapSet bvecSet
stackMaps []bvec
- regMapSet map[liveRegMask]int
- regMaps []liveRegMask
cache progeffectscache
}
@@ -158,7 +146,7 @@ func (m *LivenessMap) reset() {
delete(m.vals, k)
}
}
- m.deferreturn = LivenessInvalid
+ m.deferreturn = LivenessDontCare
}
func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
@@ -166,27 +154,17 @@ func (m *LivenessMap) set(v *ssa.Value, i LivenessIndex) {
}
func (m LivenessMap) Get(v *ssa.Value) LivenessIndex {
- if !go115ReduceLiveness {
- // All safe-points are in the map, so if v isn't in
- // the map, it's an unsafe-point.
- if idx, ok := m.vals[v.ID]; ok {
- return idx
- }
- return LivenessInvalid
- }
-
// If v isn't in the map, then it's a "don't care" and not an
// unsafe-point.
if idx, ok := m.vals[v.ID]; ok {
return idx
}
- return LivenessIndex{StackMapDontCare, StackMapDontCare, false}
+ return LivenessIndex{StackMapDontCare, false}
}
// LivenessIndex stores the liveness map information for a Value.
type LivenessIndex struct {
stackMapIndex int
- regMapIndex int // only for !go115ReduceLiveness
// isUnsafePoint indicates that this is an unsafe-point.
//
@@ -197,8 +175,10 @@ type LivenessIndex struct {
isUnsafePoint bool
}
-// LivenessInvalid indicates an unsafe point with no stack map.
-var LivenessInvalid = LivenessIndex{StackMapDontCare, StackMapDontCare, true} // only for !go115ReduceLiveness
+// LivenessDontCare indicates that the liveness information doesn't
+// matter. Currently it is used in deferreturn liveness when we don't
+// actually need it. It should never be emitted to the PCDATA stream.
+var LivenessDontCare = LivenessIndex{StackMapDontCare, true}
// StackMapDontCare indicates that the stack map index at a Value
// doesn't matter.
@@ -212,46 +192,12 @@ func (idx LivenessIndex) StackMapValid() bool {
return idx.stackMapIndex != StackMapDontCare
}
-func (idx LivenessIndex) RegMapValid() bool {
- return idx.regMapIndex != StackMapDontCare
-}
-
type progeffectscache struct {
retuevar []int32
tailuevar []int32
initialized bool
}
-// varRegVec contains liveness bitmaps for variables and registers.
-type varRegVec struct {
- vars bvec
- regs liveRegMask
-}
-
-func (v *varRegVec) Eq(v2 varRegVec) bool {
- return v.vars.Eq(v2.vars) && v.regs == v2.regs
-}
-
-func (v *varRegVec) Copy(v2 varRegVec) {
- v.vars.Copy(v2.vars)
- v.regs = v2.regs
-}
-
-func (v *varRegVec) Clear() {
- v.vars.Clear()
- v.regs = 0
-}
-
-func (v *varRegVec) Or(v1, v2 varRegVec) {
- v.vars.Or(v1.vars, v2.vars)
- v.regs = v1.regs | v2.regs
-}
-
-func (v *varRegVec) AndNot(v1, v2 varRegVec) {
- v.vars.AndNot(v1.vars, v2.vars)
- v.regs = v1.regs &^ v2.regs
-}
-
// livenessShouldTrack reports whether the liveness analysis
// should track the variable n.
// We don't care about variables that have no pointers,
@@ -400,110 +346,6 @@ func affectedNode(v *ssa.Value) (*Node, ssa.SymEffect) {
}
}
-// regEffects returns the registers affected by v.
-func (lv *Liveness) regEffects(v *ssa.Value) (uevar, kill liveRegMask) {
- if go115ReduceLiveness {
- return 0, 0
- }
- if v.Op == ssa.OpPhi {
- // All phi node arguments must come from the same
- // register and the result must also go to that
- // register, so there's no overall effect.
- return 0, 0
- }
- addLocs := func(mask liveRegMask, v *ssa.Value, ptrOnly bool) liveRegMask {
- if int(v.ID) >= len(lv.f.RegAlloc) {
- // v has no allocated registers.
- return mask
- }
- loc := lv.f.RegAlloc[v.ID]
- if loc == nil {
- // v has no allocated registers.
- return mask
- }
- if v.Op == ssa.OpGetG {
- // GetG represents the G register, which is a
- // pointer, but not a valid GC register. The
- // current G is always reachable, so it's okay
- // to ignore this register.
- return mask
- }
-
- // Collect registers and types from v's location.
- var regs [2]*ssa.Register
- nreg := 0
- switch loc := loc.(type) {
- case ssa.LocalSlot:
- return mask
- case *ssa.Register:
- if ptrOnly && !v.Type.HasPointers() {
- return mask
- }
- regs[0] = loc
- nreg = 1
- case ssa.LocPair:
- // The value will have TTUPLE type, and the
- // children are nil or *ssa.Register.
- if v.Type.Etype != types.TTUPLE {
- v.Fatalf("location pair %s has non-tuple type %v", loc, v.Type)
- }
- for i, loc1 := range &loc {
- if loc1 == nil {
- continue
- }
- if ptrOnly && !v.Type.FieldType(i).HasPointers() {
- continue
- }
- regs[nreg] = loc1.(*ssa.Register)
- nreg++
- }
- default:
- v.Fatalf("weird RegAlloc location: %s (%T)", loc, loc)
- }
-
- // Add register locations to vars.
- for _, reg := range regs[:nreg] {
- if reg.GCNum() == -1 {
- if ptrOnly {
- v.Fatalf("pointer in non-pointer register %v", reg)
- } else {
- continue
- }
- }
- mask |= 1 << uint(reg.GCNum())
- }
- return mask
- }
-
- // v clobbers all registers it writes to (whether or not the
- // write is pointer-typed).
- kill = addLocs(0, v, false)
- for _, arg := range v.Args {
- // v uses all registers is reads from, but we only
- // care about marking those containing pointers.
- uevar = addLocs(uevar, arg, true)
- }
- return uevar, kill
-}
-
-type liveRegMask uint32 // only if !go115ReduceLiveness
-
-func (m liveRegMask) niceString(config *ssa.Config) string {
- if m == 0 {
- return ""
- }
- str := ""
- for i, reg := range config.GCRegMap {
- if m&(1<= f.NumBlocks() {
lv.be = lc.be[:f.NumBlocks()]
}
- lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessInvalid}
+ lv.livenessMap = LivenessMap{vals: lc.livenessMap.vals, deferreturn: LivenessDontCare}
lc.livenessMap.vals = nil
}
if lv.be == nil {
@@ -546,10 +386,10 @@ func newliveness(fn *Node, f *ssa.Func, vars []*Node, idx map[*Node]int32, stkpt
for _, b := range f.Blocks {
be := lv.blockEffects(b)
- be.uevar = varRegVec{vars: bulk.next()}
- be.varkill = varRegVec{vars: bulk.next()}
- be.livein = varRegVec{vars: bulk.next()}
- be.liveout = varRegVec{vars: bulk.next()}
+ be.uevar = bulk.next()
+ be.varkill = bulk.next()
+ be.livein = bulk.next()
+ be.liveout = bulk.next()
}
lv.livenessMap.reset()
@@ -637,20 +477,6 @@ func onebitwalktype1(t *types.Type, off int64, bv bvec) {
}
}
-// usedRegs returns the maximum width of the live register map.
-func (lv *Liveness) usedRegs() int32 {
- var any liveRegMask
- for _, live := range lv.regMaps {
- any |= live
- }
- i := int32(0)
- for any != 0 {
- any >>= 1
- i++
- }
- return i
-}
-
// Generates live pointer value maps for arguments and local variables. The
// this argument and the in arguments are always assumed live. The vars
// argument is a slice of *Nodes.
@@ -851,31 +677,16 @@ func (lv *Liveness) markUnsafePoints() {
// particular, call Values can have a stack map in case the callee
// grows the stack, but not themselves be a safe-point.
func (lv *Liveness) hasStackMap(v *ssa.Value) bool {
- // The runtime only has safe-points in function prologues, so
- // we only need stack maps at call sites. go:nosplit functions
- // are similar.
- if go115ReduceLiveness || compiling_runtime || lv.f.NoSplit {
- if !v.Op.IsCall() {
- return false
- }
- // typedmemclr and typedmemmove are write barriers and
- // deeply non-preemptible. They are unsafe points and
- // hence should not have liveness maps.
- if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) {
- return false
- }
- return true
- }
-
- switch v.Op {
- case ssa.OpInitMem, ssa.OpArg, ssa.OpSP, ssa.OpSB,
- ssa.OpSelect0, ssa.OpSelect1, ssa.OpGetG,
- ssa.OpVarDef, ssa.OpVarLive, ssa.OpKeepAlive,
- ssa.OpPhi:
- // These don't produce code (see genssa).
+ if !v.Op.IsCall() {
return false
}
- return !lv.unsafePoints.Get(int32(v.ID))
+ // typedmemclr and typedmemmove are write barriers and
+ // deeply non-preemptible. They are unsafe points and
+ // hence should not have liveness maps.
+ if sym, ok := v.Aux.(*ssa.AuxCall); ok && (sym.Fn == typedmemclr || sym.Fn == typedmemmove) {
+ return false
+ }
+ return true
}
// Initializes the sets for solving the live variables. Visits all the
@@ -891,17 +702,13 @@ func (lv *Liveness) prologue() {
// effects with the each prog effects.
for j := len(b.Values) - 1; j >= 0; j-- {
pos, e := lv.valueEffects(b.Values[j])
- regUevar, regKill := lv.regEffects(b.Values[j])
if e&varkill != 0 {
- be.varkill.vars.Set(pos)
- be.uevar.vars.Unset(pos)
+ be.varkill.Set(pos)
+ be.uevar.Unset(pos)
}
- be.varkill.regs |= regKill
- be.uevar.regs &^= regKill
if e&uevar != 0 {
- be.uevar.vars.Set(pos)
+ be.uevar.Set(pos)
}
- be.uevar.regs |= regUevar
}
}
}
@@ -911,8 +718,8 @@ func (lv *Liveness) solve() {
// These temporary bitvectors exist to avoid successive allocations and
// frees within the loop.
nvars := int32(len(lv.vars))
- newlivein := varRegVec{vars: bvalloc(nvars)}
- newliveout := varRegVec{vars: bvalloc(nvars)}
+ newlivein := bvalloc(nvars)
+ newliveout := bvalloc(nvars)
// Walk blocks in postorder ordering. This improves convergence.
po := lv.f.Postorder()
@@ -930,11 +737,11 @@ func (lv *Liveness) solve() {
switch b.Kind {
case ssa.BlockRet:
for _, pos := range lv.cache.retuevar {
- newliveout.vars.Set(pos)
+ newliveout.Set(pos)
}
case ssa.BlockRetJmp:
for _, pos := range lv.cache.tailuevar {
- newliveout.vars.Set(pos)
+ newliveout.Set(pos)
}
case ssa.BlockExit:
// panic exit - nothing to do
@@ -969,7 +776,7 @@ func (lv *Liveness) solve() {
// variables at each safe point locations.
func (lv *Liveness) epilogue() {
nvars := int32(len(lv.vars))
- liveout := varRegVec{vars: bvalloc(nvars)}
+ liveout := bvalloc(nvars)
livedefer := bvalloc(nvars) // always-live variables
// If there is a defer (that could recover), then all output
@@ -1025,12 +832,11 @@ func (lv *Liveness) epilogue() {
{
// Reserve an entry for function entry.
live := bvalloc(nvars)
- lv.livevars = append(lv.livevars, varRegVec{vars: live})
+ lv.livevars = append(lv.livevars, live)
}
for _, b := range lv.f.Blocks {
be := lv.blockEffects(b)
- firstBitmapIndex := len(lv.livevars)
// Walk forward through the basic block instructions and
// allocate liveness maps for those instructions that need them.
@@ -1040,7 +846,7 @@ func (lv *Liveness) epilogue() {
}
live := bvalloc(nvars)
- lv.livevars = append(lv.livevars, varRegVec{vars: live})
+ lv.livevars = append(lv.livevars, live)
}
// walk backward, construct maps at each safe point
@@ -1056,21 +862,18 @@ func (lv *Liveness) epilogue() {
live := &lv.livevars[index]
live.Or(*live, liveout)
- live.vars.Or(live.vars, livedefer) // only for non-entry safe points
+ live.Or(*live, livedefer) // only for non-entry safe points
index--
}
// Update liveness information.
pos, e := lv.valueEffects(v)
- regUevar, regKill := lv.regEffects(v)
if e&varkill != 0 {
- liveout.vars.Unset(pos)
+ liveout.Unset(pos)
}
- liveout.regs &^= regKill
if e&uevar != 0 {
- liveout.vars.Set(pos)
+ liveout.Set(pos)
}
- liveout.regs |= regUevar
}
if b == lv.f.Entry {
@@ -1080,7 +883,7 @@ func (lv *Liveness) epilogue() {
// Check to make sure only input variables are live.
for i, n := range lv.vars {
- if !liveout.vars.Get(int32(i)) {
+ if !liveout.Get(int32(i)) {
continue
}
if n.Class() == PPARAM {
@@ -1094,32 +897,16 @@ func (lv *Liveness) epilogue() {
live.Or(*live, liveout)
}
- // Check that no registers are live across calls.
- // For closure calls, the CALLclosure is the last use
- // of the context register, so it's dead after the call.
- index = int32(firstBitmapIndex)
- for _, v := range b.Values {
- if lv.hasStackMap(v) {
- live := lv.livevars[index]
- if v.Op.IsCall() && live.regs != 0 {
- lv.printDebug()
- v.Fatalf("%v register %s recorded as live at call", lv.fn.Func.Nname, live.regs.niceString(lv.f.Config))
- }
- index++
- }
- }
-
// The liveness maps for this block are now complete. Compact them.
lv.compact(b)
}
// If we have an open-coded deferreturn call, make a liveness map for it.
if lv.fn.Func.OpenCodedDeferDisallowed() {
- lv.livenessMap.deferreturn = LivenessInvalid
+ lv.livenessMap.deferreturn = LivenessDontCare
} else {
lv.livenessMap.deferreturn = LivenessIndex{
stackMapIndex: lv.stackMapSet.add(livedefer),
- regMapIndex: 0, // entry regMap, containing no live registers
isUnsafePoint: false,
}
}
@@ -1136,20 +923,10 @@ func (lv *Liveness) epilogue() {
lv.f.Fatalf("%v %L recorded as live on entry", lv.fn.Func.Nname, n)
}
}
- if !go115ReduceLiveness {
- // Check that no registers are live at function entry.
- // The context register, if any, comes from a
- // LoweredGetClosurePtr operation first thing in the function,
- // so it doesn't appear live at entry.
- if regs := lv.regMaps[0]; regs != 0 {
- lv.printDebug()
- lv.f.Fatalf("%v register %s recorded as live on entry", lv.fn.Func.Nname, regs.niceString(lv.f.Config))
- }
- }
}
// Compact coalesces identical bitmaps from lv.livevars into the sets
-// lv.stackMapSet and lv.regMaps.
+// lv.stackMapSet.
//
// Compact clears lv.livevars.
//
@@ -1165,45 +942,23 @@ func (lv *Liveness) epilogue() {
// PCDATA tables cost about 100k. So for now we keep using a single index for
// both bitmap lists.
func (lv *Liveness) compact(b *ssa.Block) {
- add := func(live varRegVec, isUnsafePoint bool) LivenessIndex { // only if !go115ReduceLiveness
- // Deduplicate the stack map.
- stackIndex := lv.stackMapSet.add(live.vars)
- // Deduplicate the register map.
- regIndex, ok := lv.regMapSet[live.regs]
- if !ok {
- regIndex = len(lv.regMapSet)
- lv.regMapSet[live.regs] = regIndex
- lv.regMaps = append(lv.regMaps, live.regs)
- }
- return LivenessIndex{stackIndex, regIndex, isUnsafePoint}
- }
pos := 0
if b == lv.f.Entry {
// Handle entry stack map.
- if !go115ReduceLiveness {
- add(lv.livevars[0], false)
- } else {
- lv.stackMapSet.add(lv.livevars[0].vars)
- }
+ lv.stackMapSet.add(lv.livevars[0])
pos++
}
for _, v := range b.Values {
- if go115ReduceLiveness {
- hasStackMap := lv.hasStackMap(v)
- isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
- idx := LivenessIndex{StackMapDontCare, StackMapDontCare, isUnsafePoint}
- if hasStackMap {
- idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos].vars)
- pos++
- }
- if hasStackMap || isUnsafePoint {
- lv.livenessMap.set(v, idx)
- }
- } else if lv.hasStackMap(v) {
- isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
- lv.livenessMap.set(v, add(lv.livevars[pos], isUnsafePoint))
+ hasStackMap := lv.hasStackMap(v)
+ isUnsafePoint := lv.allUnsafe || lv.unsafePoints.Get(int32(v.ID))
+ idx := LivenessIndex{StackMapDontCare, isUnsafePoint}
+ if hasStackMap {
+ idx.stackMapIndex = lv.stackMapSet.add(lv.livevars[pos])
pos++
}
+ if hasStackMap || isUnsafePoint {
+ lv.livenessMap.set(v, idx)
+ }
}
// Reset livevars.
@@ -1250,8 +1005,8 @@ func (lv *Liveness) showlive(v *ssa.Value, live bvec) {
Warnl(pos, s)
}
-func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool {
- if live.vars.IsEmpty() && live.regs == 0 {
+func (lv *Liveness) printbvec(printed bool, name string, live bvec) bool {
+ if live.IsEmpty() {
return printed
}
@@ -1264,19 +1019,18 @@ func (lv *Liveness) printbvec(printed bool, name string, live varRegVec) bool {
comma := ""
for i, n := range lv.vars {
- if !live.vars.Get(int32(i)) {
+ if !live.Get(int32(i)) {
continue
}
fmt.Printf("%s%s", comma, n.Sym.Name)
comma = ","
}
- fmt.Printf("%s%s", comma, live.regs.niceString(lv.f.Config))
return true
}
-// printeffect is like printbvec, but for valueEffects and regEffects.
-func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, regMask liveRegMask) bool {
- if !x && regMask == 0 {
+// printeffect is like printbvec, but for valueEffects.
+func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool) bool {
+ if !x {
return printed
}
if !printed {
@@ -1288,15 +1042,7 @@ func (lv *Liveness) printeffect(printed bool, name string, pos int32, x bool, re
if x {
fmt.Printf("%s", lv.vars[pos].Sym.Name)
}
- for j, reg := range lv.f.Config.GCRegMap {
- if regMask&(1< 32 {
- // Our uint32 conversion below won't work.
- Fatalf("GP registers overflow uint32")
- }
-
- if regs.n > 0 {
- for _, live := range lv.regMaps {
- regs.Clear()
- regs.b[0] = uint32(live)
- roff = dbvec(®sSymTmp, roff, regs)
- }
- }
- }
-
// Give these LSyms content-addressable names,
// so that they can be de-duplicated.
// This provides significant binary size savings.
@@ -1502,11 +1219,7 @@ func (lv *Liveness) emit() (argsSym, liveSym, regsSym *obj.LSym) {
lsym.Set(obj.AttrContentAddressable, true)
})
}
- if !go115ReduceLiveness {
- return makeSym(&argsSymTmp), makeSym(&liveSymTmp), makeSym(®sSymTmp)
- }
- // TODO(go115ReduceLiveness): Remove regsSym result
- return makeSym(&argsSymTmp), makeSym(&liveSymTmp), nil
+ return makeSym(&argsSymTmp), makeSym(&liveSymTmp)
}
// Entry pointer for liveness analysis. Solves for the liveness of
@@ -1553,7 +1266,7 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
// Emit the live pointer map data structures
ls := e.curfn.Func.lsym
fninfo := ls.Func()
- fninfo.GCArgs, fninfo.GCLocals, fninfo.GCRegs = lv.emit()
+ fninfo.GCArgs, fninfo.GCLocals = lv.emit()
p := pp.Prog(obj.AFUNCDATA)
Addrconst(&p.From, objabi.FUNCDATA_ArgsPointerMaps)
@@ -1567,14 +1280,6 @@ func liveness(e *ssafn, f *ssa.Func, pp *Progs) LivenessMap {
p.To.Name = obj.NAME_EXTERN
p.To.Sym = fninfo.GCLocals
- if !go115ReduceLiveness {
- p = pp.Prog(obj.AFUNCDATA)
- Addrconst(&p.From, objabi.FUNCDATA_RegPointerMaps)
- p.To.Type = obj.TYPE_MEM
- p.To.Name = obj.NAME_EXTERN
- p.To.Sym = fninfo.GCRegs
- }
-
return lv.livenessMap
}
diff --git a/src/cmd/compile/internal/gc/reflect.go b/src/cmd/compile/internal/gc/reflect.go
index 229fcfeaee4..21429af782e 100644
--- a/src/cmd/compile/internal/gc/reflect.go
+++ b/src/cmd/compile/internal/gc/reflect.go
@@ -1275,9 +1275,8 @@ func dtypesym(t *types.Type) *obj.LSym {
}
ot = dgopkgpath(lsym, ot, tpkg)
- xcount := sort.Search(n, func(i int) bool { return !types.IsExported(m[i].name.Name) })
ot = dsymptr(lsym, ot, lsym, ot+3*Widthptr+uncommonSize(t))
- ot = duintptr(lsym, ot, uint64(xcount))
+ ot = duintptr(lsym, ot, uint64(n))
ot = duintptr(lsym, ot, uint64(n))
dataAdd := imethodSize() * n
ot = dextratype(lsym, ot, t, dataAdd)
diff --git a/src/cmd/compile/internal/gc/scc.go b/src/cmd/compile/internal/gc/scc.go
index 60e0a9b8b51..5c7935aa876 100644
--- a/src/cmd/compile/internal/gc/scc.go
+++ b/src/cmd/compile/internal/gc/scc.go
@@ -75,8 +75,19 @@ func (v *bottomUpVisitor) visit(n *Node) uint32 {
inspectList(n.Nbody, func(n *Node) bool {
switch n.Op {
- case OCALLFUNC, OCALLMETH:
- fn := asNode(n.Left.Type.Nname())
+ case ONAME:
+ if n.Class() == PFUNC {
+ if n.isMethodExpression() {
+ n = asNode(n.Type.Nname())
+ }
+ if n != nil && n.Name.Defn != nil {
+ if m := v.visit(n.Name.Defn); m < min {
+ min = m
+ }
+ }
+ }
+ case ODOTMETH:
+ fn := asNode(n.Type.Nname())
if fn != nil && fn.Op == ONAME && fn.Class() == PFUNC && fn.Name.Defn != nil {
if m := v.visit(fn.Name.Defn); m < min {
min = m
diff --git a/src/cmd/compile/internal/gc/sinit.go b/src/cmd/compile/internal/gc/sinit.go
index 9c4dcd739cf..212fcc022db 100644
--- a/src/cmd/compile/internal/gc/sinit.go
+++ b/src/cmd/compile/internal/gc/sinit.go
@@ -375,11 +375,6 @@ func readonlystaticname(t *types.Type) *Node {
return n
}
-func isLiteral(n *Node) bool {
- // Treat nils as zeros rather than literals.
- return n.Op == OLITERAL && n.Val().Ctype() != CTNIL
-}
-
func (n *Node) isSimpleName() bool {
return n.Op == ONAME && n.Class() != PAUTOHEAP && n.Class() != PEXTERN
}
@@ -404,7 +399,7 @@ const (
func getdyn(n *Node, top bool) initGenType {
switch n.Op {
default:
- if isLiteral(n) {
+ if n.isGoConst() {
return initConst
}
return initDynamic
@@ -559,7 +554,7 @@ func fixedlit(ctxt initContext, kind initKind, n *Node, var_ *Node, init *Nodes)
continue
}
- islit := isLiteral(value)
+ islit := value.isGoConst()
if (kind == initKindStatic && !islit) || (kind == initKindDynamic && islit) {
continue
}
@@ -732,7 +727,7 @@ func slicelit(ctxt initContext, n *Node, var_ *Node, init *Nodes) {
continue
}
- if vstat != nil && isLiteral(value) { // already set by copy from static value
+ if vstat != nil && value.isGoConst() { // already set by copy from static value
continue
}
diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go
index 65beb849117..67484904a95 100644
--- a/src/cmd/compile/internal/gc/ssa.go
+++ b/src/cmd/compile/internal/gc/ssa.go
@@ -72,9 +72,9 @@ func initssaconfig() {
deferproc = sysfunc("deferproc")
deferprocStack = sysfunc("deferprocStack")
Deferreturn = sysfunc("deferreturn")
- Duffcopy = sysvar("duffcopy") // asm func with special ABI
- Duffzero = sysvar("duffzero") // asm func with special ABI
- gcWriteBarrier = sysvar("gcWriteBarrier") // asm func with special ABI
+ Duffcopy = sysfunc("duffcopy")
+ Duffzero = sysfunc("duffzero")
+ gcWriteBarrier = sysfunc("gcWriteBarrier")
goschedguarded = sysfunc("goschedguarded")
growslice = sysfunc("growslice")
msanread = sysfunc("msanread")
@@ -105,51 +105,51 @@ func initssaconfig() {
// asm funcs with special ABI
if thearch.LinkArch.Name == "amd64" {
GCWriteBarrierReg = map[int16]*obj.LSym{
- x86.REG_AX: sysvar("gcWriteBarrier"),
- x86.REG_CX: sysvar("gcWriteBarrierCX"),
- x86.REG_DX: sysvar("gcWriteBarrierDX"),
- x86.REG_BX: sysvar("gcWriteBarrierBX"),
- x86.REG_BP: sysvar("gcWriteBarrierBP"),
- x86.REG_SI: sysvar("gcWriteBarrierSI"),
- x86.REG_R8: sysvar("gcWriteBarrierR8"),
- x86.REG_R9: sysvar("gcWriteBarrierR9"),
+ x86.REG_AX: sysfunc("gcWriteBarrier"),
+ x86.REG_CX: sysfunc("gcWriteBarrierCX"),
+ x86.REG_DX: sysfunc("gcWriteBarrierDX"),
+ x86.REG_BX: sysfunc("gcWriteBarrierBX"),
+ x86.REG_BP: sysfunc("gcWriteBarrierBP"),
+ x86.REG_SI: sysfunc("gcWriteBarrierSI"),
+ x86.REG_R8: sysfunc("gcWriteBarrierR8"),
+ x86.REG_R9: sysfunc("gcWriteBarrierR9"),
}
}
if thearch.LinkArch.Family == sys.Wasm {
- BoundsCheckFunc[ssa.BoundsIndex] = sysvar("goPanicIndex")
- BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("goPanicIndexU")
- BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("goPanicSliceAlen")
- BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("goPanicSliceAlenU")
- BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("goPanicSliceAcap")
- BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("goPanicSliceAcapU")
- BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("goPanicSliceB")
- BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("goPanicSliceBU")
- BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("goPanicSlice3Alen")
- BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("goPanicSlice3AlenU")
- BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("goPanicSlice3Acap")
- BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("goPanicSlice3AcapU")
- BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("goPanicSlice3B")
- BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("goPanicSlice3BU")
- BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("goPanicSlice3C")
- BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("goPanicSlice3CU")
+ BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("goPanicIndex")
+ BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("goPanicIndexU")
+ BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("goPanicSliceAlen")
+ BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("goPanicSliceAlenU")
+ BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("goPanicSliceAcap")
+ BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("goPanicSliceAcapU")
+ BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("goPanicSliceB")
+ BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("goPanicSliceBU")
+ BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("goPanicSlice3Alen")
+ BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("goPanicSlice3AlenU")
+ BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("goPanicSlice3Acap")
+ BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("goPanicSlice3AcapU")
+ BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("goPanicSlice3B")
+ BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("goPanicSlice3BU")
+ BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("goPanicSlice3C")
+ BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("goPanicSlice3CU")
} else {
- BoundsCheckFunc[ssa.BoundsIndex] = sysvar("panicIndex")
- BoundsCheckFunc[ssa.BoundsIndexU] = sysvar("panicIndexU")
- BoundsCheckFunc[ssa.BoundsSliceAlen] = sysvar("panicSliceAlen")
- BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysvar("panicSliceAlenU")
- BoundsCheckFunc[ssa.BoundsSliceAcap] = sysvar("panicSliceAcap")
- BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysvar("panicSliceAcapU")
- BoundsCheckFunc[ssa.BoundsSliceB] = sysvar("panicSliceB")
- BoundsCheckFunc[ssa.BoundsSliceBU] = sysvar("panicSliceBU")
- BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysvar("panicSlice3Alen")
- BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysvar("panicSlice3AlenU")
- BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysvar("panicSlice3Acap")
- BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysvar("panicSlice3AcapU")
- BoundsCheckFunc[ssa.BoundsSlice3B] = sysvar("panicSlice3B")
- BoundsCheckFunc[ssa.BoundsSlice3BU] = sysvar("panicSlice3BU")
- BoundsCheckFunc[ssa.BoundsSlice3C] = sysvar("panicSlice3C")
- BoundsCheckFunc[ssa.BoundsSlice3CU] = sysvar("panicSlice3CU")
+ BoundsCheckFunc[ssa.BoundsIndex] = sysfunc("panicIndex")
+ BoundsCheckFunc[ssa.BoundsIndexU] = sysfunc("panicIndexU")
+ BoundsCheckFunc[ssa.BoundsSliceAlen] = sysfunc("panicSliceAlen")
+ BoundsCheckFunc[ssa.BoundsSliceAlenU] = sysfunc("panicSliceAlenU")
+ BoundsCheckFunc[ssa.BoundsSliceAcap] = sysfunc("panicSliceAcap")
+ BoundsCheckFunc[ssa.BoundsSliceAcapU] = sysfunc("panicSliceAcapU")
+ BoundsCheckFunc[ssa.BoundsSliceB] = sysfunc("panicSliceB")
+ BoundsCheckFunc[ssa.BoundsSliceBU] = sysfunc("panicSliceBU")
+ BoundsCheckFunc[ssa.BoundsSlice3Alen] = sysfunc("panicSlice3Alen")
+ BoundsCheckFunc[ssa.BoundsSlice3AlenU] = sysfunc("panicSlice3AlenU")
+ BoundsCheckFunc[ssa.BoundsSlice3Acap] = sysfunc("panicSlice3Acap")
+ BoundsCheckFunc[ssa.BoundsSlice3AcapU] = sysfunc("panicSlice3AcapU")
+ BoundsCheckFunc[ssa.BoundsSlice3B] = sysfunc("panicSlice3B")
+ BoundsCheckFunc[ssa.BoundsSlice3BU] = sysfunc("panicSlice3BU")
+ BoundsCheckFunc[ssa.BoundsSlice3C] = sysfunc("panicSlice3C")
+ BoundsCheckFunc[ssa.BoundsSlice3CU] = sysfunc("panicSlice3CU")
}
if thearch.LinkArch.PtrSize == 4 {
ExtendCheckFunc[ssa.BoundsIndex] = sysvar("panicExtendIndex")
@@ -409,11 +409,17 @@ func buildssa(fn *Node, worker int) *ssa.Func {
// Generate addresses of local declarations
s.decladdrs = map[*Node]*ssa.Value{}
+ var args []ssa.Param
+ var results []ssa.Param
for _, n := range fn.Func.Dcl {
switch n.Class() {
- case PPARAM, PPARAMOUT:
+ case PPARAM:
s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
- if n.Class() == PPARAMOUT && s.canSSA(n) {
+ args = append(args, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+ case PPARAMOUT:
+ s.decladdrs[n] = s.entryNewValue2A(ssa.OpLocalAddr, types.NewPtr(n.Type), n, s.sp, s.startmem)
+ results = append(results, ssa.Param{Type: n.Type, Offset: int32(n.Xoffset)})
+ if s.canSSA(n) {
// Save ssa-able PPARAMOUT variables so we can
// store them back to the stack at the end of
// the function.
@@ -2472,6 +2478,11 @@ func (s *state) expr(n *Node) *ssa.Value {
a := s.expr(n.Left)
b := s.expr(n.Right)
return s.newValue2(s.ssaOp(n.Op, n.Type), a.Type, a, b)
+ case OANDNOT:
+ a := s.expr(n.Left)
+ b := s.expr(n.Right)
+ b = s.newValue1(s.ssaOp(OBITNOT, b.Type), b.Type, b)
+ return s.newValue2(s.ssaOp(OAND, n.Type), a.Type, a, b)
case OLSH, ORSH:
a := s.expr(n.Left)
b := s.expr(n.Right)
@@ -3541,12 +3552,24 @@ func init() {
return nil
},
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ addF("runtime/internal/atomic", "And",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
+ return nil
+ },
+ sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
addF("runtime/internal/atomic", "Or8",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
return nil
},
sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
+ addF("runtime/internal/atomic", "Or",
+ func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
+ s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
+ return nil
+ },
+ sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X)
alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...)
alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...)
@@ -3557,13 +3580,17 @@ func init() {
alias("runtime/internal/atomic", "LoadAcq", "runtime/internal/atomic", "Load", lwatomics...)
alias("runtime/internal/atomic", "LoadAcq64", "runtime/internal/atomic", "Load64", lwatomics...)
alias("runtime/internal/atomic", "LoadAcquintptr", "runtime/internal/atomic", "LoadAcq", p4...)
+ alias("sync", "runtime_LoadAcquintptr", "runtime/internal/atomic", "LoadAcq", p4...) // linknamed
alias("runtime/internal/atomic", "LoadAcquintptr", "runtime/internal/atomic", "LoadAcq64", p8...)
+ alias("sync", "runtime_LoadAcquintptr", "runtime/internal/atomic", "LoadAcq64", p8...) // linknamed
alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store", p4...)
alias("runtime/internal/atomic", "Storeuintptr", "runtime/internal/atomic", "Store64", p8...)
alias("runtime/internal/atomic", "StoreRel", "runtime/internal/atomic", "Store", lwatomics...)
alias("runtime/internal/atomic", "StoreRel64", "runtime/internal/atomic", "Store64", lwatomics...)
alias("runtime/internal/atomic", "StoreReluintptr", "runtime/internal/atomic", "StoreRel", p4...)
+ alias("sync", "runtime_StoreReluintptr", "runtime/internal/atomic", "StoreRel", p4...) // linknamed
alias("runtime/internal/atomic", "StoreReluintptr", "runtime/internal/atomic", "StoreRel64", p8...)
+ alias("sync", "runtime_StoreReluintptr", "runtime/internal/atomic", "StoreRel64", p8...) // linknamed
alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg", p4...)
alias("runtime/internal/atomic", "Xchguintptr", "runtime/internal/atomic", "Xchg64", p8...)
alias("runtime/internal/atomic", "Xadduintptr", "runtime/internal/atomic", "Xadd", p4...)
@@ -4728,7 +4755,7 @@ func (s *state) getClosureAndRcvr(fn *Node) (*ssa.Value, *ssa.Value) {
s.nilCheck(itab)
itabidx := fn.Xoffset + 2*int64(Widthptr) + 8 // offset of fun field in runtime.itab
closure := s.newValue1I(ssa.OpOffPtr, s.f.Config.Types.UintptrPtr, itabidx, itab)
- rcvr := s.newValue1(ssa.OpIData, types.Types[TUINTPTR], i)
+ rcvr := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, i)
return closure, rcvr
}
@@ -4888,7 +4915,7 @@ func (s *state) canSSA(n *Node) bool {
if n.Class() == PPARAM && n.Sym != nil && n.Sym.Name == ".this" {
// wrappers generated by genwrapper need to update
// the .this pointer in place.
- // TODO: treat as a PPARMOUT?
+ // TODO: treat as a PPARAMOUT?
return false
}
return canSSAType(n.Type)
@@ -5201,7 +5228,10 @@ func (s *state) storeTypeScalars(t *types.Type, left, right *ssa.Value, skip ski
case t.IsBoolean() || t.IsInteger() || t.IsFloat() || t.IsComplex():
s.store(t, left, right)
case t.IsPtrShaped():
- // no scalar fields.
+ if t.IsPtr() && t.Elem().NotInHeap() {
+ s.store(t, left, right) // see issue 42032
+ }
+ // otherwise, no scalar fields.
case t.IsString():
if skip&skipLen != 0 {
return
@@ -5245,6 +5275,9 @@ func (s *state) storeTypeScalars(t *types.Type, left, right *ssa.Value, skip ski
func (s *state) storeTypePtrs(t *types.Type, left, right *ssa.Value) {
switch {
case t.IsPtrShaped():
+ if t.IsPtr() && t.Elem().NotInHeap() {
+ break // see issue 42032
+ }
s.store(t, left, right)
case t.IsString():
ptr := s.newValue1(ssa.OpStringPtr, s.f.Config.Types.BytePtr, right)
@@ -6232,7 +6265,7 @@ func genssa(f *ssa.Func, pp *Progs) {
// instruction. We won't use the actual liveness map on a
// control instruction. Just mark it something that is
// preemptible, unless this function is "all unsafe".
- s.pp.nextLive = LivenessIndex{-1, -1, allUnsafe(f)}
+ s.pp.nextLive = LivenessIndex{-1, allUnsafe(f)}
// Emit values in block
thearch.SSAMarkMoves(&s, b)
@@ -6892,56 +6925,38 @@ func (e *ssafn) Auto(pos src.XPos, t *types.Type) ssa.GCNode {
}
func (e *ssafn) SplitString(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
ptrType := types.NewPtr(types.Types[TUINT8])
lenType := types.Types[TINT]
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this string up into two separate variables.
- p := e.splitSlot(&name, ".ptr", 0, ptrType)
- l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
- return p, l
- }
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off}, ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)}
+ // Split this string up into two separate variables.
+ p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+ l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+ return p, l
}
func (e *ssafn) SplitInterface(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
n := name.N.(*Node)
u := types.Types[TUINTPTR]
t := types.NewPtr(types.Types[TUINT8])
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this interface up into two separate variables.
- f := ".itab"
- if n.Type.IsEmptyInterface() {
- f = ".type"
- }
- c := e.splitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
- d := e.splitSlot(&name, ".data", u.Size(), t)
- return c, d
+ // Split this interface up into two separate variables.
+ f := ".itab"
+ if n.Type.IsEmptyInterface() {
+ f = ".type"
}
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: u, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + int64(Widthptr)}
+ c := e.SplitSlot(&name, f, 0, u) // see comment in plive.go:onebitwalktype1.
+ d := e.SplitSlot(&name, ".data", u.Size(), t)
+ return c, d
}
func (e *ssafn) SplitSlice(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
ptrType := types.NewPtr(name.Type.Elem())
lenType := types.Types[TINT]
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this slice up into three separate variables.
- p := e.splitSlot(&name, ".ptr", 0, ptrType)
- l := e.splitSlot(&name, ".len", ptrType.Size(), lenType)
- c := e.splitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
- return p, l, c
- }
- // Return the three parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: ptrType, Off: name.Off},
- ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(Widthptr)},
- ssa.LocalSlot{N: n, Type: lenType, Off: name.Off + int64(2*Widthptr)}
+ p := e.SplitSlot(&name, ".ptr", 0, ptrType)
+ l := e.SplitSlot(&name, ".len", ptrType.Size(), lenType)
+ c := e.SplitSlot(&name, ".cap", ptrType.Size()+lenType.Size(), lenType)
+ return p, l, c
}
func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
s := name.Type.Size() / 2
var t *types.Type
if s == 8 {
@@ -6949,53 +6964,30 @@ func (e *ssafn) SplitComplex(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot)
} else {
t = types.Types[TFLOAT32]
}
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this complex up into two separate variables.
- r := e.splitSlot(&name, ".real", 0, t)
- i := e.splitSlot(&name, ".imag", t.Size(), t)
- return r, i
- }
- // Return the two parts of the larger variable.
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: t, Off: name.Off + s}
+ r := e.SplitSlot(&name, ".real", 0, t)
+ i := e.SplitSlot(&name, ".imag", t.Size(), t)
+ return r, i
}
func (e *ssafn) SplitInt64(name ssa.LocalSlot) (ssa.LocalSlot, ssa.LocalSlot) {
- n := name.N.(*Node)
var t *types.Type
if name.Type.IsSigned() {
t = types.Types[TINT32]
} else {
t = types.Types[TUINT32]
}
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Split this int64 up into two separate variables.
- if thearch.LinkArch.ByteOrder == binary.BigEndian {
- return e.splitSlot(&name, ".hi", 0, t), e.splitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
- }
- return e.splitSlot(&name, ".hi", t.Size(), t), e.splitSlot(&name, ".lo", 0, types.Types[TUINT32])
- }
- // Return the two parts of the larger variable.
if thearch.LinkArch.ByteOrder == binary.BigEndian {
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off + 4}
+ return e.SplitSlot(&name, ".hi", 0, t), e.SplitSlot(&name, ".lo", t.Size(), types.Types[TUINT32])
}
- return ssa.LocalSlot{N: n, Type: t, Off: name.Off + 4}, ssa.LocalSlot{N: n, Type: types.Types[TUINT32], Off: name.Off}
+ return e.SplitSlot(&name, ".hi", t.Size(), t), e.SplitSlot(&name, ".lo", 0, types.Types[TUINT32])
}
func (e *ssafn) SplitStruct(name ssa.LocalSlot, i int) ssa.LocalSlot {
- n := name.N.(*Node)
st := name.Type
- ft := st.FieldType(i)
- var offset int64
- for f := 0; f < i; f++ {
- offset += st.FieldType(f).Size()
- }
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- // Note: the _ field may appear several times. But
- // have no fear, identically-named but distinct Autos are
- // ok, albeit maybe confusing for a debugger.
- return e.splitSlot(&name, "."+st.FieldName(i), offset, ft)
- }
- return ssa.LocalSlot{N: n, Type: ft, Off: name.Off + st.FieldOff(i)}
+ // Note: the _ field may appear several times. But
+ // have no fear, identically-named but distinct Autos are
+ // ok, albeit maybe confusing for a debugger.
+ return e.SplitSlot(&name, "."+st.FieldName(i), st.FieldOff(i), st.FieldType(i))
}
func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
@@ -7005,19 +6997,23 @@ func (e *ssafn) SplitArray(name ssa.LocalSlot) ssa.LocalSlot {
e.Fatalf(n.Pos, "bad array size")
}
et := at.Elem()
- if n.Class() == PAUTO && !n.Name.Addrtaken() {
- return e.splitSlot(&name, "[0]", 0, et)
- }
- return ssa.LocalSlot{N: n, Type: et, Off: name.Off}
+ return e.SplitSlot(&name, "[0]", 0, et)
}
func (e *ssafn) DerefItab(it *obj.LSym, offset int64) *obj.LSym {
return itabsym(it, offset)
}
-// splitSlot returns a slot representing the data of parent starting at offset.
-func (e *ssafn) splitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
- s := &types.Sym{Name: parent.N.(*Node).Sym.Name + suffix, Pkg: localpkg}
+// SplitSlot returns a slot representing the data of parent starting at offset.
+func (e *ssafn) SplitSlot(parent *ssa.LocalSlot, suffix string, offset int64, t *types.Type) ssa.LocalSlot {
+ node := parent.N.(*Node)
+
+ if node.Class() != PAUTO || node.Name.Addrtaken() {
+ // addressed things and non-autos retain their parents (i.e., cannot truly be split)
+ return ssa.LocalSlot{N: node, Type: t, Off: parent.Off + offset}
+ }
+
+ s := &types.Sym{Name: node.Sym.Name + suffix, Pkg: localpkg}
n := &Node{
Name: new(Name),
diff --git a/src/cmd/compile/internal/gc/subr.go b/src/cmd/compile/internal/gc/subr.go
index eccd6f8a74a..defefd76b34 100644
--- a/src/cmd/compile/internal/gc/subr.go
+++ b/src/cmd/compile/internal/gc/subr.go
@@ -1854,8 +1854,10 @@ func isdirectiface(t *types.Type) bool {
}
switch t.Etype {
- case TPTR,
- TCHAN,
+ case TPTR:
+ // Pointers to notinheap types must be stored indirectly. See issue 42076.
+ return !t.Elem().NotInHeap()
+ case TCHAN,
TMAP,
TFUNC,
TUNSAFEPTR:
diff --git a/src/cmd/compile/internal/gc/syntax.go b/src/cmd/compile/internal/gc/syntax.go
index e3b49639772..649f7f4157b 100644
--- a/src/cmd/compile/internal/gc/syntax.go
+++ b/src/cmd/compile/internal/gc/syntax.go
@@ -142,7 +142,7 @@ const (
_, _ // second nodeInitorder bit
_, nodeHasBreak
_, nodeNoInline // used internally by inliner to indicate that a function call should not be inlined; set for OCALLFUNC and OCALLMETH only
- _, nodeImplicit // implicit OADDR or ODEREF; ++/-- statement represented as OASOP; or ANDNOT lowered to OAND
+ _, nodeImplicit // implicit OADDR or ODEREF; ++/-- statement represented as OASOP
_, nodeIsDDD // is the argument variadic
_, nodeDiag // already printed error about this
_, nodeColas // OAS resulting from :=
@@ -480,11 +480,87 @@ type Param struct {
Innermost *Node
Outer *Node
- // OTYPE
- //
- // TODO: Should Func pragmas also be stored on the Name?
- Pragma PragmaFlag
- Alias bool // node is alias for Ntype (only used when type-checking ODCLTYPE)
+ // OTYPE & ONAME //go:embed info,
+ // sharing storage to reduce gc.Param size.
+ // Extra is nil, or else *Extra is a *paramType or an *embedFileList.
+ Extra *interface{}
+}
+
+type paramType struct {
+ flag PragmaFlag
+ alias bool
+}
+
+type embedFileList []string
+
+// Pragma returns the PragmaFlag for p, which must be for an OTYPE.
+func (p *Param) Pragma() PragmaFlag {
+ if p.Extra == nil {
+ return 0
+ }
+ return (*p.Extra).(*paramType).flag
+}
+
+// SetPragma sets the PragmaFlag for p, which must be for an OTYPE.
+func (p *Param) SetPragma(flag PragmaFlag) {
+ if p.Extra == nil {
+ if flag == 0 {
+ return
+ }
+ p.Extra = new(interface{})
+ *p.Extra = ¶mType{flag: flag}
+ return
+ }
+ (*p.Extra).(*paramType).flag = flag
+}
+
+// Alias reports whether p, which must be for an OTYPE, is a type alias.
+func (p *Param) Alias() bool {
+ if p.Extra == nil {
+ return false
+ }
+ t, ok := (*p.Extra).(*paramType)
+ if !ok {
+ return false
+ }
+ return t.alias
+}
+
+// SetAlias sets whether p, which must be for an OTYPE, is a type alias.
+func (p *Param) SetAlias(alias bool) {
+ if p.Extra == nil {
+ if !alias {
+ return
+ }
+ p.Extra = new(interface{})
+ *p.Extra = ¶mType{alias: alias}
+ return
+ }
+ (*p.Extra).(*paramType).alias = alias
+}
+
+// EmbedFiles returns the list of embedded files for p,
+// which must be for an ONAME var.
+func (p *Param) EmbedFiles() []string {
+ if p.Extra == nil {
+ return nil
+ }
+ return *(*p.Extra).(*embedFileList)
+}
+
+// SetEmbedFiles sets the list of embedded files for p,
+// which must be for an ONAME var.
+func (p *Param) SetEmbedFiles(list []string) {
+ if p.Extra == nil {
+ if len(list) == 0 {
+ return
+ }
+ f := embedFileList(list)
+ p.Extra = new(interface{})
+ *p.Extra = &f
+ return
+ }
+ *(*p.Extra).(*embedFileList) = list
}
// Functions
@@ -555,7 +631,7 @@ type Func struct {
Ntype *Node // signature
Top int // top context (ctxCallee, etc)
Closure *Node // OCLOSURE <-> ODCLFUNC
- Nname *Node
+ Nname *Node // The ONAME node associated with an ODCLFUNC (both have same Type)
lsym *obj.LSym
Inl *Inline
@@ -697,7 +773,7 @@ const (
OCALLPART // Left.Right (method expression x.Method, not called)
OCAP // cap(Left)
OCLOSE // close(Left)
- OCLOSURE // func Type { Body } (func literal)
+ OCLOSURE // func Type { Func.Closure.Nbody } (func literal)
OCOMPLIT // Right{List} (composite literal, not yet lowered to specific form)
OMAPLIT // Type{List} (composite literal, Type is map)
OSTRUCTLIT // Type{List} (composite literal, Type is struct)
@@ -787,9 +863,14 @@ const (
OSIZEOF // unsafe.Sizeof(Left)
// statements
- OBLOCK // { List } (block of code)
- OBREAK // break [Sym]
- OCASE // case List: Nbody (List==nil means default)
+ OBLOCK // { List } (block of code)
+ OBREAK // break [Sym]
+ // OCASE: case List: Nbody (List==nil means default)
+ // For OTYPESW, List is a OTYPE node for the specified type (or OLITERAL
+ // for nil), and, if a type-switch variable is specified, Rlist is an
+ // ONAME for the version of the type-switch variable with the specified
+ // type.
+ OCASE
OCONTINUE // continue [Sym]
ODEFER // defer Left (Left must be call)
OEMPTY // no-op (empty statement)
@@ -813,15 +894,19 @@ const (
ORETURN // return List
OSELECT // select { List } (List is list of OCASE)
OSWITCH // switch Ninit; Left { List } (List is a list of OCASE)
- OTYPESW // Left = Right.(type) (appears as .Left of OSWITCH)
+ // OTYPESW: Left := Right.(type) (appears as .Left of OSWITCH)
+ // Left is nil if there is no type-switch variable
+ OTYPESW
// types
OTCHAN // chan int
OTMAP // map[string]int
OTSTRUCT // struct{}
OTINTER // interface{}
- OTFUNC // func()
- OTARRAY // []int, [8]int, [N]int or [...]int
+ // OTFUNC: func() - Left is receiver field, List is list of param fields, Rlist is
+ // list of result fields.
+ OTFUNC
+ OTARRAY // []int, [8]int, [N]int or [...]int
// misc
ODDD // func f(args ...int) or f(l...) or var a = [...]int{0, 1, 2}.
diff --git a/src/cmd/compile/internal/gc/typecheck.go b/src/cmd/compile/internal/gc/typecheck.go
index a4b462da1d4..8ebeaf1330c 100644
--- a/src/cmd/compile/internal/gc/typecheck.go
+++ b/src/cmd/compile/internal/gc/typecheck.go
@@ -257,12 +257,12 @@ func typecheck(n *Node, top int) (res *Node) {
// are substituted.
cycle := cycleFor(n)
for _, n1 := range cycle {
- if n1.Name != nil && !n1.Name.Param.Alias {
+ if n1.Name != nil && !n1.Name.Param.Alias() {
// Cycle is ok. But if n is an alias type and doesn't
// have a type yet, we have a recursive type declaration
// with aliases that we can't handle properly yet.
// Report an error rather than crashing later.
- if n.Name != nil && n.Name.Param.Alias && n.Type == nil {
+ if n.Name != nil && n.Name.Param.Alias() && n.Type == nil {
lineno = n.Pos
Fatalf("cannot handle alias type declaration (issue #25838): %v", n)
}
@@ -2516,7 +2516,7 @@ func lookdot(n *Node, t *types.Type, dostrcmp int) *types.Field {
n.Left = nod(OADDR, n.Left, nil)
n.Left.SetImplicit(true)
n.Left = typecheck(n.Left, ctxType|ctxExpr)
- } else if tt.IsPtr() && !rcvr.IsPtr() && types.Identical(tt.Elem(), rcvr) {
+ } else if tt.IsPtr() && (!rcvr.IsPtr() || rcvr.IsPtr() && rcvr.Elem().NotInHeap()) && types.Identical(tt.Elem(), rcvr) {
n.Left = nod(ODEREF, n.Left, nil)
n.Left.SetImplicit(true)
n.Left = typecheck(n.Left, ctxType|ctxExpr)
@@ -3504,7 +3504,7 @@ func setUnderlying(t, underlying *types.Type) {
}
// Propagate go:notinheap pragma from the Name to the Type.
- if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma&NotInHeap != 0 {
+ if n.Name != nil && n.Name.Param != nil && n.Name.Param.Pragma()&NotInHeap != 0 {
t.SetNotInHeap(true)
}
@@ -3676,7 +3676,7 @@ func typecheckdef(n *Node) {
n.Name.Defn = typecheck(n.Name.Defn, ctxStmt) // fills in n.Type
case OTYPE:
- if p := n.Name.Param; p.Alias {
+ if p := n.Name.Param; p.Alias() {
// Type alias declaration: Simply use the rhs type - no need
// to create a new type.
// If we have a syntax error, p.Ntype may be nil.
diff --git a/src/cmd/compile/internal/gc/walk.go b/src/cmd/compile/internal/gc/walk.go
index 6ce3eda44b9..82898c81672 100644
--- a/src/cmd/compile/internal/gc/walk.go
+++ b/src/cmd/compile/internal/gc/walk.go
@@ -474,7 +474,7 @@ opswitch:
ODEREF, OSPTR, OITAB, OIDATA, OADDR:
n.Left = walkexpr(n.Left, init)
- case OEFACE, OAND, OSUB, OMUL, OADD, OOR, OXOR, OLSH, ORSH:
+ case OEFACE, OAND, OANDNOT, OSUB, OMUL, OADD, OOR, OXOR, OLSH, ORSH:
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
@@ -965,14 +965,6 @@ opswitch:
fn := basicnames[param] + "to" + basicnames[result]
n = conv(mkcall(fn, types.Types[result], init, conv(n.Left, types.Types[param])), n.Type)
- case OANDNOT:
- n.Left = walkexpr(n.Left, init)
- n.Op = OAND
- n.SetImplicit(true) // for walkCheckPtrArithmetic
- n.Right = nod(OBITNOT, n.Right, nil)
- n.Right = typecheck(n.Right, ctxExpr)
- n.Right = walkexpr(n.Right, init)
-
case ODIV, OMOD:
n.Left = walkexpr(n.Left, init)
n.Right = walkexpr(n.Right, init)
@@ -997,7 +989,7 @@ opswitch:
// runtime calls late in SSA processing.
if Widthreg < 8 && (et == TINT64 || et == TUINT64) {
if n.Right.Op == OLITERAL {
- // Leave div/mod by constant powers of 2.
+ // Leave div/mod by constant powers of 2 or small 16-bit constants.
// The SSA backend will handle those.
switch et {
case TINT64:
@@ -1010,6 +1002,9 @@ opswitch:
}
case TUINT64:
c := uint64(n.Right.Int64Val())
+ if c < 1<<16 {
+ break opswitch
+ }
if c != 0 && c&(c-1) == 0 {
break opswitch
}
@@ -1965,7 +1960,17 @@ func walkprint(nn *Node, init *Nodes) *Node {
on = syslook("printiface")
}
on = substArgTypes(on, n.Type) // any-1
- case TPTR, TCHAN, TMAP, TFUNC, TUNSAFEPTR:
+ case TPTR:
+ if n.Type.Elem().NotInHeap() {
+ on = syslook("printuintptr")
+ n = nod(OCONV, n, nil)
+ n.Type = types.Types[TUNSAFEPTR]
+ n = nod(OCONV, n, nil)
+ n.Type = types.Types[TUINTPTR]
+ break
+ }
+ fallthrough
+ case TCHAN, TMAP, TFUNC, TUNSAFEPTR:
on = syslook("printpointer")
on = substArgTypes(on, n.Type) // any-1
case TSLICE:
@@ -3609,14 +3614,20 @@ func bounded(n *Node, max int64) bool {
}
switch n.Op {
- case OAND:
+ case OAND, OANDNOT:
v := int64(-1)
- if smallintconst(n.Left) {
+ switch {
+ case smallintconst(n.Left):
v = n.Left.Int64Val()
- } else if smallintconst(n.Right) {
+ case smallintconst(n.Right):
v = n.Right.Int64Val()
+ if n.Op == OANDNOT {
+ v = ^v
+ if !sign {
+ v &= 1< f.Config.RegSize:
hiName, loName := f.fe.SplitInt64(name)
newNames = append(newNames, hiName, loName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpInt64Make {
continue
}
f.NamedValues[hiName] = append(f.NamedValues[hiName], v.Args[0])
f.NamedValues[loName] = append(f.NamedValues[loName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsComplex():
rName, iName := f.fe.SplitComplex(name)
newNames = append(newNames, rName, iName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpComplexMake {
continue
}
f.NamedValues[rName] = append(f.NamedValues[rName], v.Args[0])
f.NamedValues[iName] = append(f.NamedValues[iName], v.Args[1])
-
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsString():
ptrName, lenName := f.fe.SplitString(name)
newNames = append(newNames, ptrName, lenName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpStringMake {
continue
}
f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsSlice():
ptrName, lenName, capName := f.fe.SplitSlice(name)
newNames = append(newNames, ptrName, lenName, capName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpSliceMake {
continue
}
f.NamedValues[ptrName] = append(f.NamedValues[ptrName], v.Args[0])
f.NamedValues[lenName] = append(f.NamedValues[lenName], v.Args[1])
f.NamedValues[capName] = append(f.NamedValues[capName], v.Args[2])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsInterface():
typeName, dataName := f.fe.SplitInterface(name)
newNames = append(newNames, typeName, dataName)
- for _, v := range f.NamedValues[name] {
+ for j, v := range f.NamedValues[name] {
if v.Op != OpIMake {
continue
}
f.NamedValues[typeName] = append(f.NamedValues[typeName], v.Args[0])
f.NamedValues[dataName] = append(f.NamedValues[dataName], v.Args[1])
+ toDelete = append(toDelete, namedVal{i, j})
}
- delete(f.NamedValues, name)
case t.IsFloat():
// floats are never decomposed, even ones bigger than RegSize
- newNames = append(newNames, name)
case t.Size() > f.Config.RegSize:
f.Fatalf("undecomposed named type %s %v", name, t)
- default:
- newNames = append(newNames, name)
}
}
- f.Names = newNames
+
+ deleteNamedVals(f, toDelete)
+ f.Names = append(f.Names, newNames...)
}
func decomposeBuiltInPhi(v *Value) {
@@ -263,14 +266,20 @@ func decomposeUserArrayInto(f *Func, name LocalSlot, slots []LocalSlot) []LocalS
f.Fatalf("array not of size 1")
}
elemName := f.fe.SplitArray(name)
+ var keep []*Value
for _, v := range f.NamedValues[name] {
if v.Op != OpArrayMake1 {
+ keep = append(keep, v)
continue
}
f.NamedValues[elemName] = append(f.NamedValues[elemName], v.Args[0])
}
- // delete the name for the array as a whole
- delete(f.NamedValues, name)
+ if len(keep) == 0 {
+ // delete the name for the array as a whole
+ delete(f.NamedValues, name)
+ } else {
+ f.NamedValues[name] = keep
+ }
if t.Elem().IsArray() {
return decomposeUserArrayInto(f, elemName, slots)
@@ -300,17 +309,23 @@ func decomposeUserStructInto(f *Func, name LocalSlot, slots []LocalSlot) []Local
}
makeOp := StructMakeOp(n)
+ var keep []*Value
// create named values for each struct field
for _, v := range f.NamedValues[name] {
if v.Op != makeOp {
+ keep = append(keep, v)
continue
}
for i := 0; i < len(fnames); i++ {
f.NamedValues[fnames[i]] = append(f.NamedValues[fnames[i]], v.Args[i])
}
}
- // remove the name of the struct as a whole
- delete(f.NamedValues, name)
+ if len(keep) == 0 {
+ // delete the name for the struct as a whole
+ delete(f.NamedValues, name)
+ } else {
+ f.NamedValues[name] = keep
+ }
// now that this f.NamedValues contains values for the struct
// fields, recurse into nested structs
@@ -400,3 +415,35 @@ func StructMakeOp(nf int) Op {
}
panic("too many fields in an SSAable struct")
}
+
+type namedVal struct {
+ locIndex, valIndex int // f.NamedValues[f.Names[locIndex]][valIndex] = key
+}
+
+// deleteNamedVals removes particular values with debugger names from f's naming data structures
+func deleteNamedVals(f *Func, toDelete []namedVal) {
+ // Arrange to delete from larger indices to smaller, to ensure swap-with-end deletion does not invalid pending indices.
+ sort.Slice(toDelete, func(i, j int) bool {
+ if toDelete[i].locIndex != toDelete[j].locIndex {
+ return toDelete[i].locIndex > toDelete[j].locIndex
+ }
+ return toDelete[i].valIndex > toDelete[j].valIndex
+
+ })
+
+ // Get rid of obsolete names
+ for _, d := range toDelete {
+ loc := f.Names[d.locIndex]
+ vals := f.NamedValues[loc]
+ l := len(vals) - 1
+ if l > 0 {
+ vals[d.valIndex] = vals[l]
+ f.NamedValues[loc] = vals[:l]
+ } else {
+ delete(f.NamedValues, loc)
+ l = len(f.Names) - 1
+ f.Names[d.locIndex] = f.Names[l]
+ f.Names = f.Names[:l]
+ }
+ }
+}
diff --git a/src/cmd/compile/internal/ssa/expand_calls.go b/src/cmd/compile/internal/ssa/expand_calls.go
index bbd9aeee512..fbde19d94c2 100644
--- a/src/cmd/compile/internal/ssa/expand_calls.go
+++ b/src/cmd/compile/internal/ssa/expand_calls.go
@@ -11,27 +11,49 @@ import (
"sort"
)
+type selKey struct {
+ from *Value
+ offset int64
+ size int64
+ typ *types.Type
+}
+
+type offsetKey struct {
+ from *Value
+ offset int64
+ pt *types.Type
+}
+
// expandCalls converts LE (Late Expansion) calls that act like they receive value args into a lower-level form
// that is more oriented to a platform's ABI. The SelectN operations that extract results are rewritten into
// more appropriate forms, and any StructMake or ArrayMake inputs are decomposed until non-struct values are
-// reached (for now, Strings, Slices, Complex, and Interface are not decomposed because they are rewritten in
-// a subsequent phase, but that may need to change for a register ABI in case one of those composite values is
-// split between registers and memory).
-//
-// TODO: when it comes time to use registers, might want to include builtin selectors as well, but currently that happens in lower.
+// reached. On the callee side, OpArg nodes are not decomposed until this phase is run.
+// TODO results should not be lowered until this phase.
func expandCalls(f *Func) {
+ // Calls that need lowering have some number of inputs, including a memory input,
+ // and produce a tuple of (value1, value2, ..., mem) where valueK may or may not be SSA-able.
+
+ // With the current ABI those inputs need to be converted into stores to memory,
+ // rethreading the call's memory input to the first, and the new call now receiving the last.
+
+ // With the current ABI, the outputs need to be converted to loads, which will all use the call's
+ // memory output as their input.
if !LateCallExpansionEnabledWithin(f) {
return
}
+ debug := f.pass.debug > 0
+
+ if debug {
+ fmt.Printf("\nexpandsCalls(%s)\n", f.Name)
+ }
+
canSSAType := f.fe.CanSSA
regSize := f.Config.RegSize
sp, _ := f.spSb()
+ typ := &f.Config.Types
+ ptrSize := f.Config.PtrSize
- debug := f.pass.debug > 0
-
- // For 32-bit, need to deal with decomposition of 64-bit integers
- tUint32 := types.Types[types.TUINT32]
- tInt32 := types.Types[types.TINT32]
+ // For 32-bit, need to deal with decomposition of 64-bit integers, which depends on endianness.
var hiOffset, lowOffset int64
if f.Config.BigEndian {
lowOffset = 4
@@ -39,25 +61,68 @@ func expandCalls(f *Func) {
hiOffset = 4
}
+ namedSelects := make(map[*Value][]namedVal)
+
+ sdom := f.Sdom()
+
+ common := make(map[selKey]*Value)
+
// intPairTypes returns the pair of 32-bit int types needed to encode a 64-bit integer type on a target
// that has no 64-bit integer registers.
intPairTypes := func(et types.EType) (tHi, tLo *types.Type) {
- tHi = tUint32
+ tHi = typ.UInt32
if et == types.TINT64 {
- tHi = tInt32
+ tHi = typ.Int32
}
- tLo = tUint32
+ tLo = typ.UInt32
return
}
// isAlreadyExpandedAggregateType returns whether a type is an SSA-able "aggregate" (multiple register) type
- // that was expanded in an earlier phase (small user-defined arrays and structs, lowered in decomposeUser).
- // Other aggregate types are expanded in decomposeBuiltin, which comes later.
+ // that was expanded in an earlier phase (currently, expand_calls is intended to run after decomposeBuiltin,
+ // so this is all aggregate types -- small struct and array, complex, interface, string, slice, and 64-bit
+ // integer on 32-bit).
isAlreadyExpandedAggregateType := func(t *types.Type) bool {
if !canSSAType(t) {
return false
}
- return t.IsStruct() || t.IsArray() || regSize == 4 && t.Size() > 4 && t.IsInteger()
+ return t.IsStruct() || t.IsArray() || t.IsComplex() || t.IsInterface() || t.IsString() || t.IsSlice() ||
+ t.Size() > regSize && t.IsInteger()
+ }
+
+ offsets := make(map[offsetKey]*Value)
+
+ // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
+ // TODO should also optimize offsets from SB?
+ offsetFrom := func(from *Value, offset int64, pt *types.Type) *Value {
+ if offset == 0 && from.Type == pt { // this is not actually likely
+ return from
+ }
+ // Simplify, canonicalize
+ for from.Op == OpOffPtr {
+ offset += from.AuxInt
+ from = from.Args[0]
+ }
+ if from == sp {
+ return f.ConstOffPtrSP(pt, offset, sp)
+ }
+ key := offsetKey{from, offset, pt}
+ v := offsets[key]
+ if v != nil {
+ return v
+ }
+ v = from.Block.NewValue1I(from.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
+ offsets[key] = v
+ return v
+ }
+
+ // splitSlots splits one "field" (specified by sfx, offset, and ty) out of the LocalSlots in ls and returns the new LocalSlots this generates.
+ splitSlots := func(ls []LocalSlot, sfx string, offset int64, ty *types.Type) []LocalSlot {
+ var locs []LocalSlot
+ for i := range ls {
+ locs = append(locs, f.fe.SplitSlot(&ls[i], sfx, offset, ty))
+ }
+ return locs
}
// removeTrivialWrapperTypes unwraps layers of
@@ -92,14 +157,101 @@ func expandCalls(f *Func) {
// With the current ABI, the outputs need to be converted to loads, which will all use the call's
// memory output as their input.
- // rewriteSelect recursively walks leaf selector to a root (OpSelectN) through
- // a chain of Struct/Array Select operations. If the chain of selectors does not
- // end in OpSelectN, it does nothing (this can happen depending on compiler phase ordering).
- // It emits the code necessary to implement the leaf select operation that leads to the call.
+ // rewriteSelect recursively walks from leaf selector to a root (OpSelectN, OpLoad, OpArg)
+ // through a chain of Struct/Array/builtin Select operations. If the chain of selectors does not
+ // end in an expected root, it does nothing (this can happen depending on compiler phase ordering).
+ // The "leaf" provides the type, the root supplies the container, and the leaf-to-root path
+ // accumulates the offset.
+ // It emits the code necessary to implement the leaf select operation that leads to the root.
+ //
// TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
- var rewriteSelect func(leaf *Value, selector *Value, offset int64)
- rewriteSelect = func(leaf *Value, selector *Value, offset int64) {
+ var rewriteSelect func(leaf *Value, selector *Value, offset int64) []LocalSlot
+ rewriteSelect = func(leaf *Value, selector *Value, offset int64) []LocalSlot {
+ if debug {
+ fmt.Printf("rewriteSelect(%s, %s, %d)\n", leaf.LongString(), selector.LongString(), offset)
+ }
+ var locs []LocalSlot
+ leafType := leaf.Type
+ if len(selector.Args) > 0 {
+ w := selector.Args[0]
+ if w.Op == OpCopy {
+ for w.Op == OpCopy {
+ w = w.Args[0]
+ }
+ selector.SetArg(0, w)
+ }
+ }
switch selector.Op {
+ case OpArg:
+ if !isAlreadyExpandedAggregateType(selector.Type) {
+ if leafType == selector.Type { // OpIData leads us here, sometimes.
+ leaf.copyOf(selector)
+ } else {
+ f.Fatalf("Unexpected OpArg type, selector=%s, leaf=%s\n", selector.LongString(), leaf.LongString())
+ }
+ if debug {
+ fmt.Printf("\tOpArg, break\n")
+ }
+ break
+ }
+ if leaf.Op == OpIData {
+ leafType = removeTrivialWrapperTypes(leaf.Type)
+ }
+ aux := selector.Aux
+ auxInt := selector.AuxInt + offset
+ if leaf.Block == selector.Block {
+ leaf.reset(OpArg)
+ leaf.Aux = aux
+ leaf.AuxInt = auxInt
+ leaf.Type = leafType
+ } else {
+ w := selector.Block.NewValue0IA(leaf.Pos, OpArg, leafType, auxInt, aux)
+ leaf.copyOf(w)
+ if debug {
+ fmt.Printf("\tnew %s\n", w.LongString())
+ }
+ }
+ for _, s := range namedSelects[selector] {
+ locs = append(locs, f.Names[s.locIndex])
+ }
+
+ case OpLoad: // We end up here because of IData of immediate structures.
+ // Failure case:
+ // (note the failure case is very rare; w/o this case, make.bash and run.bash both pass, as well as
+ // the hard cases of building {syscall,math,math/cmplx,math/bits,go/constant} on ppc64le and mips-softfloat).
+ //
+ // GOSSAFUNC='(*dumper).dump' go build -gcflags=-l -tags=math_big_pure_go cmd/compile/internal/gc
+ // cmd/compile/internal/gc/dump.go:136:14: internal compiler error: '(*dumper).dump': not lowered: v827, StructSelect PTR PTR
+ // b2: ← b1
+ // v20 (+142) = StaticLECall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v8 v1
+ // v21 (142) = SelectN [1] v20
+ // v22 (142) = SelectN [0] v20
+ // b15: ← b8
+ // v71 (+143) = IData v22 (v[Nodes])
+ // v73 (+146) = StaticLECall <[]*Node,mem> {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v71 v21
+ //
+ // translates (w/o the "case OpLoad:" above) to:
+ //
+ // b2: ← b1
+ // v20 (+142) = StaticCall {AuxCall{reflect.Value.Interface([reflect.Value,0])[interface {},24]}} [40] v715
+ // v23 (142) = Load <*uintptr> v19 v20
+ // v823 (142) = IsNonNil v23
+ // v67 (+143) = Load <*[]*Node> v880 v20
+ // b15: ← b8
+ // v827 (146) = StructSelect <*[]*Node> [0] v67
+ // v846 (146) = Store {*[]*Node} v769 v827 v20
+ // v73 (+146) = StaticCall {AuxCall{"".Nodes.Slice([Nodes,0])[[]*Node,8]}} [32] v846
+ // i.e., the struct select is generated and remains in because it is not applied to an actual structure.
+ // The OpLoad was created to load the single field of the IData
+ // This case removes that StructSelect.
+ if leafType != selector.Type {
+ f.Fatalf("Unexpected Load as selector, leaf=%s, selector=%s\n", leaf.LongString(), selector.LongString())
+ }
+ leaf.copyOf(selector)
+ for _, s := range namedSelects[selector] {
+ locs = append(locs, f.Names[s.locIndex])
+ }
+
case OpSelectN:
// TODO these may be duplicated. Should memoize. Intermediate selectors will go dead, no worries there.
call := selector.Args[0]
@@ -110,9 +262,9 @@ func expandCalls(f *Func) {
leaf.copyOf(call)
} else {
leafType := removeTrivialWrapperTypes(leaf.Type)
- pt := types.NewPtr(leafType)
if canSSAType(leafType) {
- off := f.ConstOffPtrSP(pt, offset+aux.OffsetOfResult(which), sp)
+ pt := types.NewPtr(leafType)
+ off := offsetFrom(sp, offset+aux.OffsetOfResult(which), pt)
// Any selection right out of the arg area/registers has to be same Block as call, use call as mem input.
if leaf.Block == call.Block {
leaf.reset(OpLoad)
@@ -121,118 +273,268 @@ func expandCalls(f *Func) {
} else {
w := call.Block.NewValue2(leaf.Pos, OpLoad, leafType, off, call)
leaf.copyOf(w)
+ if debug {
+ fmt.Printf("\tnew %s\n", w.LongString())
+ }
+ }
+ for _, s := range namedSelects[selector] {
+ locs = append(locs, f.Names[s.locIndex])
}
} else {
- panic("Should not have non-SSA-able OpSelectN")
+ f.Fatalf("Should not have non-SSA-able OpSelectN, selector=%s", selector.LongString())
}
}
+
case OpStructSelect:
w := selector.Args[0]
- if w.Type.Etype != types.TSTRUCT {
- fmt.Printf("Bad type for w:\nv=%v\nsel=%v\nw=%v\n,f=%s\n", leaf.LongString(), selector.LongString(), w.LongString(), f.Name)
+ var ls []LocalSlot
+ if w.Type.Etype != types.TSTRUCT { // IData artifact
+ ls = rewriteSelect(leaf, w, offset)
+ } else {
+ ls = rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
+ if w.Op != OpIData {
+ for _, l := range ls {
+ locs = append(locs, f.fe.SplitStruct(l, int(selector.AuxInt)))
+ }
+ }
}
- rewriteSelect(leaf, w, offset+w.Type.FieldOff(int(selector.AuxInt)))
-
- case OpInt64Hi:
- w := selector.Args[0]
- rewriteSelect(leaf, w, offset+hiOffset)
-
- case OpInt64Lo:
- w := selector.Args[0]
- rewriteSelect(leaf, w, offset+lowOffset)
case OpArraySelect:
w := selector.Args[0]
rewriteSelect(leaf, w, offset+selector.Type.Size()*selector.AuxInt)
+
+ case OpInt64Hi:
+ w := selector.Args[0]
+ ls := rewriteSelect(leaf, w, offset+hiOffset)
+ locs = splitSlots(ls, ".hi", hiOffset, leafType)
+
+ case OpInt64Lo:
+ w := selector.Args[0]
+ ls := rewriteSelect(leaf, w, offset+lowOffset)
+ locs = splitSlots(ls, ".lo", lowOffset, leafType)
+
+ case OpStringPtr:
+ ls := rewriteSelect(leaf, selector.Args[0], offset)
+ locs = splitSlots(ls, ".ptr", 0, typ.BytePtr)
+
+ case OpSlicePtr:
+ w := selector.Args[0]
+ ls := rewriteSelect(leaf, w, offset)
+ locs = splitSlots(ls, ".ptr", 0, types.NewPtr(w.Type.Elem()))
+
+ case OpITab:
+ w := selector.Args[0]
+ ls := rewriteSelect(leaf, w, offset)
+ sfx := ".itab"
+ if w.Type.IsEmptyInterface() {
+ sfx = ".type"
+ }
+ locs = splitSlots(ls, sfx, 0, typ.Uintptr)
+
+ case OpComplexReal:
+ ls := rewriteSelect(leaf, selector.Args[0], offset)
+ locs = splitSlots(ls, ".real", 0, leafType)
+
+ case OpComplexImag:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+leafType.Width) // result is FloatNN, width of result is offset of imaginary part.
+ locs = splitSlots(ls, ".imag", leafType.Width, leafType)
+
+ case OpStringLen, OpSliceLen:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+ locs = splitSlots(ls, ".len", ptrSize, leafType)
+
+ case OpIData:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+ptrSize)
+ locs = splitSlots(ls, ".data", ptrSize, leafType)
+
+ case OpSliceCap:
+ ls := rewriteSelect(leaf, selector.Args[0], offset+2*ptrSize)
+ locs = splitSlots(ls, ".cap", 2*ptrSize, leafType)
+
+ case OpCopy: // If it's an intermediate result, recurse
+ locs = rewriteSelect(leaf, selector.Args[0], offset)
+ for _, s := range namedSelects[selector] {
+ // this copy may have had its own name, preserve that, too.
+ locs = append(locs, f.Names[s.locIndex])
+ }
+
default:
- // Ignore dead ends; on 32-bit, these can occur running before decompose builtins.
+ // Ignore dead ends. These can occur if this phase is run before decompose builtin (which is not intended, but allowed).
}
+
+ return locs
}
- // storeArg converts stores of SSA-able aggregate arguments (passed to a call) into a series of stores of
- // smaller types into individual parameter slots.
- // TODO when registers really arrive, must also decompose anything split across two registers or registers and memory.
- var storeArg func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value
- storeArg = func(pos src.XPos, b *Block, a *Value, t *types.Type, offset int64, mem *Value) *Value {
- switch a.Op {
- case OpArrayMake0, OpStructMake0:
- return mem
- case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
- for i := 0; i < t.NumFields(); i++ {
- fld := t.Field(i)
- mem = storeArg(pos, b, a.Args[i], fld.Type, offset+fld.Offset, mem)
+ // storeArgOrLoad converts stores of SSA-able aggregate arguments (passed to a call) into a series of primitive-typed
+ // stores of non-aggregate types. It recursively walks up a chain of selectors until it reaches a Load or an Arg.
+ // If it does not reach a Load or an Arg, nothing happens; this allows a little freedom in phase ordering.
+ var storeArgOrLoad func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value
+
+ // decomposeArgOrLoad is a helper for storeArgOrLoad.
+ // It decomposes a Load or an Arg into smaller parts, parameterized by the decomposeOne and decomposeTwo functions
+ // passed to it, and returns the new mem. If the type does not match one of the expected aggregate types, it returns nil instead.
+ decomposeArgOrLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64,
+ decomposeOne func(pos src.XPos, b *Block, base, source, mem *Value, t1 *types.Type, offArg, offStore int64) *Value,
+ decomposeTwo func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value) *Value {
+ u := source.Type
+ switch u.Etype {
+ case types.TARRAY:
+ elem := u.Elem()
+ for i := int64(0); i < u.NumElem(); i++ {
+ elemOff := i * elem.Size()
+ mem = decomposeOne(pos, b, base, source, mem, elem, source.AuxInt+elemOff, offset+elemOff)
+ pos = pos.WithNotStmt()
+ }
+ return mem
+ case types.TSTRUCT:
+ for i := 0; i < u.NumFields(); i++ {
+ fld := u.Field(i)
+ mem = decomposeOne(pos, b, base, source, mem, fld.Type, source.AuxInt+fld.Offset, offset+fld.Offset)
+ pos = pos.WithNotStmt()
}
return mem
- case OpArrayMake1:
- return storeArg(pos, b, a.Args[0], t.Elem(), offset, mem)
-
- case OpInt64Make:
- tHi, tLo := intPairTypes(t.Etype)
- mem = storeArg(pos, b, a.Args[0], tHi, offset+hiOffset, mem)
- return storeArg(pos, b, a.Args[1], tLo, offset+lowOffset, mem)
- }
- dst := f.ConstOffPtrSP(types.NewPtr(t), offset, sp)
- x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, a, mem)
- if debug {
- fmt.Printf("storeArg(%v) returns %s\n", a, x.LongString())
- }
- return x
- }
-
- // offsetFrom creates an offset from a pointer, simplifying chained offsets and offsets from SP
- // TODO should also optimize offsets from SB?
- offsetFrom := func(dst *Value, offset int64, t *types.Type) *Value {
- pt := types.NewPtr(t)
- if offset == 0 && dst.Type == pt { // this is not actually likely
- return dst
- }
- if dst.Op != OpOffPtr {
- return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, dst)
- }
- // Simplify OpOffPtr
- from := dst.Args[0]
- offset += dst.AuxInt
- if from == sp {
- return f.ConstOffPtrSP(pt, offset, sp)
- }
- return dst.Block.NewValue1I(dst.Pos.WithNotStmt(), OpOffPtr, pt, offset, from)
- }
-
- // splitStore converts a store of an SSA-able aggregate into a series of smaller stores, emitting
- // appropriate Struct/Array Select operations (which will soon go dead) to obtain the parts.
- var splitStore func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value
- splitStore = func(dst, src, mem, v *Value, t *types.Type, offset int64, firstStorePos src.XPos) *Value {
- // TODO might be worth commoning up duplicate selectors, but since they go dead, maybe no point.
- pos := v.Pos.WithNotStmt()
- switch t.Etype {
case types.TINT64, types.TUINT64:
if t.Width == regSize {
break
}
tHi, tLo := intPairTypes(t.Etype)
- sel := src.Block.NewValue1(pos, OpInt64Hi, tHi, src)
- mem = splitStore(dst, sel, mem, v, tHi, offset+hiOffset, firstStorePos)
- firstStorePos = firstStorePos.WithNotStmt()
- sel = src.Block.NewValue1(pos, OpInt64Lo, tLo, src)
- return splitStore(dst, sel, mem, v, tLo, offset+lowOffset, firstStorePos)
+ mem = decomposeOne(pos, b, base, source, mem, tHi, source.AuxInt+hiOffset, offset+hiOffset)
+ pos = pos.WithNotStmt()
+ return decomposeOne(pos, b, base, source, mem, tLo, source.AuxInt+lowOffset, offset+lowOffset)
+ case types.TINTER:
+ return decomposeTwo(pos, b, base, source, mem, typ.Uintptr, typ.BytePtr, source.AuxInt, offset)
+ case types.TSTRING:
+ return decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+ case types.TCOMPLEX64:
+ return decomposeTwo(pos, b, base, source, mem, typ.Float32, typ.Float32, source.AuxInt, offset)
+ case types.TCOMPLEX128:
+ return decomposeTwo(pos, b, base, source, mem, typ.Float64, typ.Float64, source.AuxInt, offset)
+ case types.TSLICE:
+ mem = decomposeTwo(pos, b, base, source, mem, typ.BytePtr, typ.Int, source.AuxInt, offset)
+ return decomposeOne(pos, b, base, source, mem, typ.Int, source.AuxInt+2*ptrSize, offset+2*ptrSize)
+ }
+ return nil
+ }
- case types.TARRAY:
- elt := t.Elem()
- if src.Op == OpIData && t.NumElem() == 1 && t.Width == regSize && elt.Width == regSize {
- t = removeTrivialWrapperTypes(t)
- if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
- f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
- }
- break // handle the leaf type.
+ // storeOneArg creates a decomposed (one step) arg that is then stored.
+ // pos and b locate the store instruction, base is the base of the store target, source is the "base" of the value input,
+ // mem is the input mem, t is the type in question, and offArg and offStore are the offsets from the respective bases.
+ storeOneArg := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+ w := common[selKey{source, offArg, t.Width, t}]
+ if w == nil {
+ w = source.Block.NewValue0IA(source.Pos, OpArg, t, offArg, source.Aux)
+ common[selKey{source, offArg, t.Width, t}] = w
+ }
+ return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+ }
+
+ // storeOneLoad creates a decomposed (one step) load that is then stored.
+ storeOneLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offArg, offStore int64) *Value {
+ from := offsetFrom(source.Args[0], offArg, types.NewPtr(t))
+ w := source.Block.NewValue2(source.Pos, OpLoad, t, from, mem)
+ return storeArgOrLoad(pos, b, base, w, mem, t, offStore)
+ }
+
+ storeTwoArg := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+ mem = storeOneArg(pos, b, base, source, mem, t1, offArg, offStore)
+ pos = pos.WithNotStmt()
+ t1Size := t1.Size()
+ return storeOneArg(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+ }
+
+ storeTwoLoad := func(pos src.XPos, b *Block, base, source, mem *Value, t1, t2 *types.Type, offArg, offStore int64) *Value {
+ mem = storeOneLoad(pos, b, base, source, mem, t1, offArg, offStore)
+ pos = pos.WithNotStmt()
+ t1Size := t1.Size()
+ return storeOneLoad(pos, b, base, source, mem, t2, offArg+t1Size, offStore+t1Size)
+ }
+
+ storeArgOrLoad = func(pos src.XPos, b *Block, base, source, mem *Value, t *types.Type, offset int64) *Value {
+ if debug {
+ fmt.Printf("\tstoreArgOrLoad(%s; %s; %s; %s; %d)\n", base.LongString(), source.LongString(), mem.String(), t.String(), offset)
+ }
+
+ switch source.Op {
+ case OpCopy:
+ return storeArgOrLoad(pos, b, base, source.Args[0], mem, t, offset)
+
+ case OpLoad:
+ ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneLoad, storeTwoLoad)
+ if ret != nil {
+ return ret
}
- for i := int64(0); i < t.NumElem(); i++ {
- sel := src.Block.NewValue1I(pos, OpArraySelect, elt, i, src)
- mem = splitStore(dst, sel, mem, v, elt, offset+i*elt.Width, firstStorePos)
- firstStorePos = firstStorePos.WithNotStmt()
+
+ case OpArg:
+ ret := decomposeArgOrLoad(pos, b, base, source, mem, t, offset, storeOneArg, storeTwoArg)
+ if ret != nil {
+ return ret
+ }
+
+ case OpArrayMake0, OpStructMake0:
+ return mem
+
+ case OpStructMake1, OpStructMake2, OpStructMake3, OpStructMake4:
+ for i := 0; i < t.NumFields(); i++ {
+ fld := t.Field(i)
+ mem = storeArgOrLoad(pos, b, base, source.Args[i], mem, fld.Type, offset+fld.Offset)
+ pos = pos.WithNotStmt()
}
return mem
+
+ case OpArrayMake1:
+ return storeArgOrLoad(pos, b, base, source.Args[0], mem, t.Elem(), offset)
+
+ case OpInt64Make:
+ tHi, tLo := intPairTypes(t.Etype)
+ mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tHi, offset+hiOffset)
+ pos = pos.WithNotStmt()
+ return storeArgOrLoad(pos, b, base, source.Args[1], mem, tLo, offset+lowOffset)
+
+ case OpComplexMake:
+ tPart := typ.Float32
+ wPart := t.Width / 2
+ if wPart == 8 {
+ tPart = typ.Float64
+ }
+ mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, tPart, offset)
+ pos = pos.WithNotStmt()
+ return storeArgOrLoad(pos, b, base, source.Args[1], mem, tPart, offset+wPart)
+
+ case OpIMake:
+ mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.Uintptr, offset)
+ pos = pos.WithNotStmt()
+ return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.BytePtr, offset+ptrSize)
+
+ case OpStringMake:
+ mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+ pos = pos.WithNotStmt()
+ return storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
+
+ case OpSliceMake:
+ mem = storeArgOrLoad(pos, b, base, source.Args[0], mem, typ.BytePtr, offset)
+ pos = pos.WithNotStmt()
+ mem = storeArgOrLoad(pos, b, base, source.Args[1], mem, typ.Int, offset+ptrSize)
+ return storeArgOrLoad(pos, b, base, source.Args[2], mem, typ.Int, offset+2*ptrSize)
+ }
+
+ // For nodes that cannot be taken apart -- OpSelectN, other structure selectors.
+ switch t.Etype {
+ case types.TARRAY:
+ elt := t.Elem()
+ if source.Type != t && t.NumElem() == 1 && elt.Width == t.Width && t.Width == regSize {
+ t = removeTrivialWrapperTypes(t)
+ // it could be a leaf type, but the "leaf" could be complex64 (for example)
+ return storeArgOrLoad(pos, b, base, source, mem, t, offset)
+ }
+ for i := int64(0); i < t.NumElem(); i++ {
+ sel := source.Block.NewValue1I(pos, OpArraySelect, elt, i, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, elt, offset+i*elt.Width)
+ pos = pos.WithNotStmt()
+ }
+ return mem
+
case types.TSTRUCT:
- if src.Op == OpIData && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
+ if source.Type != t && t.NumFields() == 1 && t.Field(0).Type.Width == t.Width && t.Width == regSize {
// This peculiar test deals with accesses to immediate interface data.
// It works okay because everything is the same size.
// Example code that triggers this can be found in go/constant/value.go, function ToComplex
@@ -240,25 +542,84 @@ func expandCalls(f *Func) {
// v121 (+882) = StaticLECall {AuxCall{"".itof([intVal,0])[floatVal,8]}} [16] v119 v1
// This corresponds to the generic rewrite rule "(StructSelect [0] (IData x)) => (IData x)"
// Guard against "struct{struct{*foo}}"
+ // Other rewriting phases create minor glitches when they transform IData, for instance the
+ // interface-typed Arg "x" of ToFloat in go/constant/value.go
+ // v6 (858) = Arg {x} (x[Value], x[Value])
+ // is rewritten by decomposeArgs into
+ // v141 (858) = Arg {x}
+ // v139 (858) = Arg <*uint8> {x} [8]
+ // because of a type case clause on line 862 of go/constant/value.go
+ // case intVal:
+ // return itof(x)
+ // v139 is later stored as an intVal == struct{val *big.Int} which naively requires the fields of
+ // of a *uint8, which does not succeed.
t = removeTrivialWrapperTypes(t)
- if t.Etype == types.TSTRUCT || t.Etype == types.TARRAY {
- f.Fatalf("Did not expect to find IDATA-immediate with non-trivial struct/array in it")
- }
- break // handle the leaf type.
+ // it could be a leaf type, but the "leaf" could be complex64 (for example)
+ return storeArgOrLoad(pos, b, base, source, mem, t, offset)
}
+
for i := 0; i < t.NumFields(); i++ {
fld := t.Field(i)
- sel := src.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), src)
- mem = splitStore(dst, sel, mem, v, fld.Type, offset+fld.Offset, firstStorePos)
- firstStorePos = firstStorePos.WithNotStmt()
+ sel := source.Block.NewValue1I(pos, OpStructSelect, fld.Type, int64(i), source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, fld.Type, offset+fld.Offset)
+ pos = pos.WithNotStmt()
}
return mem
+
+ case types.TINT64, types.TUINT64:
+ if t.Width == regSize {
+ break
+ }
+ tHi, tLo := intPairTypes(t.Etype)
+ sel := source.Block.NewValue1(pos, OpInt64Hi, tHi, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, tHi, offset+hiOffset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpInt64Lo, tLo, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, tLo, offset+lowOffset)
+
+ case types.TINTER:
+ sel := source.Block.NewValue1(pos, OpITab, typ.BytePtr, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpIData, typ.BytePtr, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset+ptrSize)
+
+ case types.TSTRING:
+ sel := source.Block.NewValue1(pos, OpStringPtr, typ.BytePtr, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, typ.BytePtr, offset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpStringLen, typ.Int, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
+
+ case types.TSLICE:
+ et := types.NewPtr(t.Elem())
+ sel := source.Block.NewValue1(pos, OpSlicePtr, et, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, et, offset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpSliceLen, typ.Int, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+ptrSize)
+ sel = source.Block.NewValue1(pos, OpSliceCap, typ.Int, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, typ.Int, offset+2*ptrSize)
+
+ case types.TCOMPLEX64:
+ sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float32, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float32, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, typ.Float32, offset+4)
+
+ case types.TCOMPLEX128:
+ sel := source.Block.NewValue1(pos, OpComplexReal, typ.Float64, source)
+ mem = storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset)
+ pos = pos.WithNotStmt()
+ sel = source.Block.NewValue1(pos, OpComplexImag, typ.Float64, source)
+ return storeArgOrLoad(pos, b, base, sel, mem, typ.Float64, offset+8)
}
- // Default, including for aggregates whose single element exactly fills their container
- // TODO this will be a problem for cast interfaces containing floats when we move to registers.
- x := v.Block.NewValue3A(firstStorePos, OpStore, types.TypeMem, t, offsetFrom(dst, offset, t), src, mem)
+
+ dst := offsetFrom(base, offset, types.NewPtr(t))
+ x := b.NewValue3A(pos, OpStore, types.TypeMem, t, dst, source, mem)
if debug {
- fmt.Printf("splitStore(%v, %v, %v, %v) returns %s\n", dst, src, mem, v, x.LongString())
+ fmt.Printf("\t\tstoreArg returns %s\n", x.LongString())
}
return x
}
@@ -286,28 +647,33 @@ func expandCalls(f *Func) {
}
// "Dereference" of addressed (probably not-SSA-eligible) value becomes Move
// TODO this will be more complicated with registers in the picture.
- src := a.Args[0]
- dst := f.ConstOffPtrSP(src.Type, aux.OffsetOfArg(auxI), sp)
+ source := a.Args[0]
+ dst := f.ConstOffPtrSP(source.Type, aux.OffsetOfArg(auxI), sp)
if a.Uses == 1 && a.Block == v.Block {
a.reset(OpMove)
a.Pos = pos
a.Type = types.TypeMem
a.Aux = aux.TypeOfArg(auxI)
a.AuxInt = aux.SizeOfArg(auxI)
- a.SetArgs3(dst, src, mem)
+ a.SetArgs3(dst, source, mem)
mem = a
} else {
- mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, src, mem)
+ mem = v.Block.NewValue3A(pos, OpMove, types.TypeMem, aux.TypeOfArg(auxI), dst, source, mem)
mem.AuxInt = aux.SizeOfArg(auxI)
}
} else {
- mem = storeArg(pos, v.Block, a, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI), mem)
+ if debug {
+ fmt.Printf("storeArg %s, %v, %d\n", a.LongString(), aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
+ }
+ mem = storeArgOrLoad(pos, v.Block, sp, a, mem, aux.TypeOfArg(auxI), aux.OffsetOfArg(auxI))
}
}
v.resetArgs()
return mem
}
+ // TODO if too slow, whole program iteration can be replaced w/ slices of appropriate values, accumulated in first loop here.
+
// Step 0: rewrite the calls to convert incoming args to stores.
for _, b := range f.Blocks {
for _, v := range b.Values {
@@ -328,15 +694,42 @@ func expandCalls(f *Func) {
}
}
- // Step 1: any stores of aggregates remaining are believed to be sourced from call results.
+ for i, name := range f.Names {
+ t := name.Type
+ if isAlreadyExpandedAggregateType(t) {
+ for j, v := range f.NamedValues[name] {
+ if v.Op == OpSelectN || v.Op == OpArg && isAlreadyExpandedAggregateType(v.Type) {
+ ns := namedSelects[v]
+ namedSelects[v] = append(ns, namedVal{locIndex: i, valIndex: j})
+ }
+ }
+ }
+ }
+
+ // Step 1: any stores of aggregates remaining are believed to be sourced from call results or args.
// Decompose those stores into a series of smaller stores, adding selection ops as necessary.
for _, b := range f.Blocks {
for _, v := range b.Values {
if v.Op == OpStore {
t := v.Aux.(*types.Type)
- if isAlreadyExpandedAggregateType(t) {
- dst, src, mem := v.Args[0], v.Args[1], v.Args[2]
- mem = splitStore(dst, src, mem, v, t, 0, v.Pos)
+ source := v.Args[1]
+ tSrc := source.Type
+ iAEATt := isAlreadyExpandedAggregateType(t)
+
+ if !iAEATt {
+ // guarding against store immediate struct into interface data field -- store type is *uint8
+ // TODO can this happen recursively?
+ iAEATt = isAlreadyExpandedAggregateType(tSrc)
+ if iAEATt {
+ t = tSrc
+ }
+ }
+ if iAEATt {
+ if debug {
+ fmt.Printf("Splitting store %s\n", v.LongString())
+ }
+ dst, mem := v.Args[0], v.Args[2]
+ mem = storeArgOrLoad(v.Pos, b, dst, source, mem, t, 0)
v.copyOf(mem)
}
}
@@ -345,23 +738,32 @@ func expandCalls(f *Func) {
val2Preds := make(map[*Value]int32) // Used to accumulate dependency graph of selection operations for topological ordering.
- // Step 2: accumulate selection operations for rewrite in topological order.
+ // Step 2: transform or accumulate selection operations for rewrite in topological order.
+ //
+ // Aggregate types that have already (in earlier phases) been transformed must be lowered comprehensively to finish
+ // the transformation (user-defined structs and arrays, slices, strings, interfaces, complex, 64-bit on 32-bit architectures),
+ //
// Any select-for-addressing applied to call results can be transformed directly.
- // TODO this is overkill; with the transformation of aggregate references into series of leaf references, it is only necessary to remember and recurse on the leaves.
for _, b := range f.Blocks {
for _, v := range b.Values {
// Accumulate chains of selectors for processing in topological order
switch v.Op {
- case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo:
+ case OpStructSelect, OpArraySelect,
+ OpIData, OpITab,
+ OpStringPtr, OpStringLen,
+ OpSlicePtr, OpSliceLen, OpSliceCap,
+ OpComplexReal, OpComplexImag,
+ OpInt64Hi, OpInt64Lo:
w := v.Args[0]
switch w.Op {
- case OpStructSelect, OpArraySelect, OpInt64Hi, OpInt64Lo, OpSelectN:
+ case OpStructSelect, OpArraySelect, OpSelectN, OpArg:
val2Preds[w] += 1
if debug {
fmt.Printf("v2p[%s] = %d\n", w.LongString(), val2Preds[w])
}
}
fallthrough
+
case OpSelectN:
if _, ok := val2Preds[v]; !ok {
val2Preds[v] = 0
@@ -369,52 +771,171 @@ func expandCalls(f *Func) {
fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
}
}
+
+ case OpArg:
+ if !isAlreadyExpandedAggregateType(v.Type) {
+ continue
+ }
+ if _, ok := val2Preds[v]; !ok {
+ val2Preds[v] = 0
+ if debug {
+ fmt.Printf("v2p[%s] = %d\n", v.LongString(), val2Preds[v])
+ }
+ }
+
case OpSelectNAddr:
// Do these directly, there are no chains of selectors.
call := v.Args[0]
which := v.AuxInt
aux := call.Aux.(*AuxCall)
pt := v.Type
- off := f.ConstOffPtrSP(pt, aux.OffsetOfResult(which), sp)
+ off := offsetFrom(sp, aux.OffsetOfResult(which), pt)
v.copyOf(off)
}
}
}
- // Compilation must be deterministic
- var ordered []*Value
- less := func(i, j int) bool { return ordered[i].ID < ordered[j].ID }
+ // Step 3: Compute topological order of selectors,
+ // then process it in reverse to eliminate duplicates,
+ // then forwards to rewrite selectors.
+ //
+ // All chains of selectors end up in same block as the call.
- // Step 3: Rewrite in topological order. All chains of selectors end up in same block as the call.
- for len(val2Preds) > 0 {
- ordered = ordered[:0]
- for v, n := range val2Preds {
- if n == 0 {
- ordered = append(ordered, v)
- }
+ // Compilation must be deterministic, so sort after extracting first zeroes from map.
+ // Sorting allows dominators-last order within each batch,
+ // so that the backwards scan for duplicates will most often find copies from dominating blocks (it is best-effort).
+ var toProcess []*Value
+ less := func(i, j int) bool {
+ vi, vj := toProcess[i], toProcess[j]
+ bi, bj := vi.Block, vj.Block
+ if bi == bj {
+ return vi.ID < vj.ID
}
- sort.Slice(ordered, less)
- for _, v := range ordered {
- for {
- w := v.Args[0]
- if debug {
- fmt.Printf("About to rewrite %s, args[0]=%s\n", v.LongString(), w.LongString())
- }
- delete(val2Preds, v)
- rewriteSelect(v, v, 0)
- v = w
- n, ok := val2Preds[v]
- if !ok {
- break
- }
- if n != 1 {
- val2Preds[v] = n - 1
- break
- }
- // Loop on new v; val2Preds[v] == 1 will be deleted in that iteration, no need to store zero.
- }
+ return sdom.domorder(bi) > sdom.domorder(bj) // reverse the order to put dominators last.
+ }
+
+ // Accumulate order in allOrdered
+ var allOrdered []*Value
+ for v, n := range val2Preds {
+ if n == 0 {
+ allOrdered = append(allOrdered, v)
}
}
+ last := 0 // allOrdered[0:last] has been top-sorted and processed
+ for len(val2Preds) > 0 {
+ toProcess = allOrdered[last:]
+ last = len(allOrdered)
+ sort.SliceStable(toProcess, less)
+ for _, v := range toProcess {
+ delete(val2Preds, v)
+ if v.Op == OpArg {
+ continue // no Args[0], hence done.
+ }
+ w := v.Args[0]
+ n, ok := val2Preds[w]
+ if !ok {
+ continue
+ }
+ if n == 1 {
+ allOrdered = append(allOrdered, w)
+ delete(val2Preds, w)
+ continue
+ }
+ val2Preds[w] = n - 1
+ }
+ }
+
+ common = make(map[selKey]*Value)
+ // Rewrite duplicate selectors as copies where possible.
+ for i := len(allOrdered) - 1; i >= 0; i-- {
+ v := allOrdered[i]
+ if v.Op == OpArg {
+ continue
+ }
+ w := v.Args[0]
+ if w.Op == OpCopy {
+ for w.Op == OpCopy {
+ w = w.Args[0]
+ }
+ v.SetArg(0, w)
+ }
+ typ := v.Type
+ if typ.IsMemory() {
+ continue // handled elsewhere, not an indexable result
+ }
+ size := typ.Width
+ offset := int64(0)
+ switch v.Op {
+ case OpStructSelect:
+ if w.Type.Etype == types.TSTRUCT {
+ offset = w.Type.FieldOff(int(v.AuxInt))
+ } else { // Immediate interface data artifact, offset is zero.
+ f.Fatalf("Expand calls interface data problem, func %s, v=%s, w=%s\n", f.Name, v.LongString(), w.LongString())
+ }
+ case OpArraySelect:
+ offset = size * v.AuxInt
+ case OpSelectN:
+ offset = w.Aux.(*AuxCall).OffsetOfResult(v.AuxInt)
+ case OpInt64Hi:
+ offset = hiOffset
+ case OpInt64Lo:
+ offset = lowOffset
+ case OpStringLen, OpSliceLen, OpIData:
+ offset = ptrSize
+ case OpSliceCap:
+ offset = 2 * ptrSize
+ case OpComplexImag:
+ offset = size
+ }
+ sk := selKey{from: w, size: size, offset: offset, typ: typ}
+ dupe := common[sk]
+ if dupe == nil {
+ common[sk] = v
+ } else if sdom.IsAncestorEq(dupe.Block, v.Block) {
+ v.copyOf(dupe)
+ } else {
+ // Because values are processed in dominator order, the old common[s] will never dominate after a miss is seen.
+ // Installing the new value might match some future values.
+ common[sk] = v
+ }
+ }
+
+ // Indices of entries in f.Names that need to be deleted.
+ var toDelete []namedVal
+
+ // Rewrite selectors.
+ for i, v := range allOrdered {
+ if debug {
+ b := v.Block
+ fmt.Printf("allOrdered[%d] = b%d, %s, uses=%d\n", i, b.ID, v.LongString(), v.Uses)
+ }
+ if v.Uses == 0 {
+ v.reset(OpInvalid)
+ continue
+ }
+ if v.Op == OpCopy {
+ continue
+ }
+ locs := rewriteSelect(v, v, 0)
+ // Install new names.
+ if v.Type.IsMemory() {
+ continue
+ }
+ // Leaf types may have debug locations
+ if !isAlreadyExpandedAggregateType(v.Type) {
+ for _, l := range locs {
+ f.NamedValues[l] = append(f.NamedValues[l], v)
+ }
+ f.Names = append(f.Names, locs...)
+ continue
+ }
+ // Not-leaf types that had debug locations need to lose them.
+ if ns, ok := namedSelects[v]; ok {
+ toDelete = append(toDelete, ns...)
+ }
+ }
+
+ deleteNamedVals(f, toDelete)
// Step 4: rewrite the calls themselves, correcting the type
for _, b := range f.Blocks {
diff --git a/src/cmd/compile/internal/ssa/export_test.go b/src/cmd/compile/internal/ssa/export_test.go
index 51665c60e27..b4c3e5cfdfb 100644
--- a/src/cmd/compile/internal/ssa/export_test.go
+++ b/src/cmd/compile/internal/ssa/export_test.go
@@ -125,6 +125,10 @@ func (d DummyFrontend) SplitStruct(s LocalSlot, i int) LocalSlot {
func (d DummyFrontend) SplitArray(s LocalSlot) LocalSlot {
return LocalSlot{N: s.N, Type: s.Type.Elem(), Off: s.Off}
}
+
+func (d DummyFrontend) SplitSlot(parent *LocalSlot, suffix string, offset int64, t *types.Type) LocalSlot {
+ return LocalSlot{N: parent.N, Type: t, Off: offset}
+}
func (DummyFrontend) Line(_ src.XPos) string {
return "unknown.go:0"
}
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 6a0b87cab42..537705c6819 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -310,7 +310,7 @@
(Const32 ...) => (MOVLconst ...)
(Const(32|64)F ...) => (MOVS(S|D)const ...)
(ConstNil) => (MOVLconst [0])
-(ConstBool [c]) => (MOVLconst [int32(b2i(c))])
+(ConstBool [c]) => (MOVLconst [b2i32(c)])
// Lowering calls
(StaticCall ...) => (CALLstatic ...)
@@ -640,31 +640,31 @@
// it compiles to a thunk call).
(MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1] {sym1} (LEAL [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOV(L|W|B|SS|SD|BLSX|WLSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOV(L|W|B|SS|SD)store [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2)
&& (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOV(L|W|B|SS|SD)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOV(L|W|B|SS|SD)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOV(L|W|B)storeconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOV(L|W|B)storeconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOV(L|W|B)storeconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
&& valoff1.canAdd32(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) =>
- ((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem)
+ ((ADD|AND|OR|XOR)Lconstmodify [valoff1.addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
// Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
@@ -679,37 +679,37 @@
// fold LEALs together
(LEAL [off1] {sym1} (LEAL [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL [off1+off2] {mergeSymTyped(sym1,sym2)} x)
+ (LEAL [off1+off2] {mergeSym(sym1,sym2)} x)
// LEAL into LEAL1
(LEAL1 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAL1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL1 into LEAL
(LEAL [off1] {sym1} (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL1 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL into LEAL[248]
(LEAL2 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAL2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL4 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAL4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL8 [off1] {sym1} (LEAL [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAL8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL[248] into LEAL
(LEAL [off1] {sym1} (LEAL2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAL [off1] {sym1} (LEAL8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAL8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAL[1248] into LEAL[1248]. Only some such merges are possible.
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL2 [off1+off2] {mergeSymTyped(sym1, sym2)} x y)
+ (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAL1 [off1] {sym1} x (LEAL1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAL2 [off1+off2] {mergeSymTyped(sym1, sym2)} y x)
+ (LEAL2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAL2 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+2*int64(off2)) =>
(LEAL4 [off1+2*off2] {sym} x y)
(LEAL4 [off1] {sym} x (LEAL1 [off2] {nil} y y)) && is32Bit(int64(off1)+4*int64(off2)) =>
@@ -993,49 +993,49 @@
&& x.Uses == 1
&& a.Off() + 1 == c.Off()
&& clobber(x)
- => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), int32(a.Off()))] {s} p mem)
+ => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), a.Off32())] {s} p mem)
(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem))
&& x.Uses == 1
&& a.Off() + 1 == c.Off()
&& clobber(x)
- => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), int32(a.Off()))] {s} p mem)
+ => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), a.Off32())] {s} p mem)
(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& a.Off() == c.Off()
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
- => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), int32(a.Off()))] {s} p0 mem)
+ => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), a.Off32())] {s} p0 mem)
(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem))
&& x.Uses == 1
&& a.Off() == c.Off()
&& sequentialAddresses(p0, p1, 1)
&& clobber(x)
- => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), int32(a.Off()))] {s} p0 mem)
+ => (MOVWstoreconst [makeValAndOff32(int32(a.Val()&0xff | c.Val()<<8), a.Off32())] {s} p0 mem)
(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem))
&& x.Uses == 1
&& a.Off() + 2 == c.Off()
&& clobber(x)
- => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), int32(a.Off()))] {s} p mem)
+ => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), a.Off32())] {s} p mem)
(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem))
&& x.Uses == 1
&& ValAndOff(a).Off() + 2 == ValAndOff(c).Off()
&& clobber(x)
- => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), int32(a.Off()))] {s} p mem)
+ => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), a.Off32())] {s} p mem)
(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem))
&& x.Uses == 1
&& a.Off() == c.Off()
&& sequentialAddresses(p0, p1, 2)
&& clobber(x)
- => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), int32(a.Off()))] {s} p0 mem)
+ => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), a.Off32())] {s} p0 mem)
(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem))
&& x.Uses == 1
&& a.Off() == c.Off()
&& sequentialAddresses(p0, p1, 2)
&& clobber(x)
- => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), int32(a.Off()))] {s} p0 mem)
+ => (MOVLstoreconst [makeValAndOff32(int32(a.Val()&0xffff | c.Val()<<16), a.Off32())] {s} p0 mem)
// Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} p (SHR(W|L)const [8] w) x:(MOVBstore [i-1] {s} p w mem))
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 8a253035e02..a866a967b92 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -401,7 +401,7 @@
(Const32F ...) => (MOVSSconst ...)
(Const64F ...) => (MOVSDconst ...)
(ConstNil ) => (MOVQconst [0])
-(ConstBool [c]) => (MOVLconst [int32(b2i(c))])
+(ConstBool [c]) => (MOVLconst [b2i32(c)])
// Lowering calls
(StaticCall ...) => (CALLstatic ...)
@@ -530,8 +530,10 @@
(AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem)
// Atomic memory updates.
-(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
-(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
+(AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem)
+(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicOr32 ptr val mem) => (ORLlock ptr val mem)
// Write barrier.
(WB ...) => (LoweredWB ...)
@@ -581,7 +583,7 @@
((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
=> ((ULT|UGE) (BTQconst [int8(log32(c))] x))
((NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
- => ((ULT|UGE) (BTQconst [int8(log2(c))] x))
+ => ((ULT|UGE) (BTQconst [int8(log64(c))] x))
(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
@@ -589,7 +591,7 @@
(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
=> (SET(B|AE) (BTQconst [int8(log32(c))] x))
(SET(NE|EQ) (TESTQ (MOVQconst [c]) x)) && isUint64PowerOfTwo(c)
- => (SET(B|AE) (BTQconst [int8(log2(c))] x))
+ => (SET(B|AE) (BTQconst [int8(log64(c))] x))
// SET..store variant
(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLL (MOVLconst [1]) x) y) mem)
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
@@ -600,7 +602,7 @@
(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
=> (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log32(c))] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c)
- => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log2(c))] x) mem)
+ => (SET(B|AE)store [off] {sym} ptr (BTQconst [int8(log64(c))] x) mem)
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
// and further combining shifts.
@@ -629,7 +631,7 @@
((ORL|XORL)const [c] x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128
- => (BT(S|C)Qconst [int8(log2(c))] x)
+ => (BT(S|C)Qconst [int8(log64(c))] x)
((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(c)) && uint64(c) >= 128
=> (BT(S|C)Lconst [int8(log32(c))] x)
@@ -640,7 +642,7 @@
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128
- => (BTRQconst [int8(log2(^c))] x)
+ => (BTRQconst [int8(log64(^c))] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x)
@@ -957,7 +959,7 @@
(MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 x x))
(MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 x x) (LEA(Q|L)8 x x))
-(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const [int8(log2(int64(c)+1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo64(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const [int8(log64(int64(c)+1))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const [int8(log32(c-1))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const [int8(log32(c-2))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const [int8(log32(c-4))] x) x)
@@ -1135,80 +1137,80 @@
// what variables are being read/written by the ops.
(MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOV(Q|L|W|B|SS|SD|O|BQSX|WQSX|LQSX)load [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOV(Q|L|W|B|SS|SD|O)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOV(Q|L|W|B|SS|SD|O)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOV(Q|L|W|B)storeconst [sc] {sym1} (LEAQ [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd32(off) =>
- (MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOV(Q|L|W|B)storeconst [ValAndOff(sc).addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
(SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (SET(L|G|B|A|LE|GE|BE|AE|EQ|NE)store [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Qload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|AND|OR|XOR)Qload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|AND|OR|XOR)Lload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
(CMP(Q|L|W|B)load [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (CMP(Q|L|W|B)load [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (CMP(Q|L|W|B)load [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(CMP(Q|L|W|B)constload [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
- (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem)
+ (CMP(Q|L|W|B)constload [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSymTyped(sym1,sym2)} val base mem)
+ ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
- ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem)
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
- ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSymTyped(sym1,sym2)} base mem)
+ ((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// fold LEAQs together
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ [off1+off2] {mergeSymTyped(sym1,sym2)} x)
+ (LEAQ [off1+off2] {mergeSym(sym1,sym2)} x)
// LEAQ into LEAQ1
(LEAQ1 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAQ1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAQ1 into LEAQ
(LEAQ [off1] {sym1} (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ1 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ1 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAQ into LEAQ[248]
(LEAQ2 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAQ2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAQ4 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAQ4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAQ8 [off1] {sym1} (LEAQ [off2] {sym2} x) y) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && x.Op != OpSB =>
- (LEAQ8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAQ[248] into LEAQ
(LEAQ [off1] {sym1} (LEAQ2 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ2 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ2 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAQ [off1] {sym1} (LEAQ4 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ4 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ4 [off1+off2] {mergeSym(sym1,sym2)} x y)
(LEAQ [off1] {sym1} (LEAQ8 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ8 [off1+off2] {mergeSymTyped(sym1,sym2)} x y)
+ (LEAQ8 [off1+off2] {mergeSym(sym1,sym2)} x y)
// LEAQ[1248] into LEAQ[1248]. Only some such merges are possible.
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ2 [off1+off2] {mergeSymTyped(sym1, sym2)} x y)
+ (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} x y)
(LEAQ1 [off1] {sym1} x (LEAQ1 [off2] {sym2} x y)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (LEAQ2 [off1+off2] {mergeSymTyped(sym1, sym2)} y x)
+ (LEAQ2 [off1+off2] {mergeSym(sym1, sym2)} y x)
(LEAQ2 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+2*int64(off2)) && sym2 == nil =>
(LEAQ4 [off1+2*off2] {sym1} x y)
(LEAQ4 [off1] {sym1} x (LEAQ1 [off2] {sym2} y y)) && is32Bit(int64(off1)+4*int64(off2)) && sym2 == nil =>
@@ -1998,31 +2000,31 @@
=> (MOVQstore [i-4] {s} p (MOVQload [j-4] {s2} p2 mem) mem)
(MOVQload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVQload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVQload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVLload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVLload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVLload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVWload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBload [off1] {sym1} (LEAL [off2] {sym2} base) mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVQstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVQstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVQstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVLstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVLstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVLstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVWstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVBstore [off1] {sym1} (LEAL [off2] {sym2} base) val mem) && canMergeSym(sym1, sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVQstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
- (MOVQstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOVQstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
(MOVLstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
- (MOVLstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOVLstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
(MOVWstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
- (MOVWstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOVWstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
(MOVBstoreconst [sc] {sym1} (LEAL [off] {sym2} ptr) mem) && canMergeSym(sym1, sym2) && sc.canAdd32(off) =>
- (MOVBstoreconst [sc.addOffset32(off)] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOVBstoreconst [sc.addOffset32(off)] {mergeSym(sym1, sym2)} ptr mem)
(MOVQload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVQload [off1+off2] {sym} ptr mem)
(MOVLload [off1] {sym} (ADDLconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOVLload [off1+off2] {sym} ptr mem)
@@ -2058,17 +2060,17 @@
(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
(MOV(Q|L|B)atomicload [off1+off2] {sym} ptr mem)
(MOV(Q|L|B)atomicload [off1] {sym1} (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOV(Q|L|B)atomicload [off1+off2] {mergeSymTyped(sym1, sym2)} ptr mem)
+ (MOV(Q|L|B)atomicload [off1+off2] {mergeSym(sym1, sym2)} ptr mem)
// Merge ADDQconst and LEAQ into atomic stores.
(XCHGQ [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
(XCHGQ [off1+off2] {sym} val ptr mem)
(XCHGQ [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB =>
- (XCHGQ [off1+off2] {mergeSymTyped(sym1,sym2)} val ptr mem)
+ (XCHGQ [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
(XCHGL [off1] {sym} val (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
(XCHGL [off1+off2] {sym} val ptr mem)
(XCHGL [off1] {sym1} val (LEAQ [off2] {sym2} ptr) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) && ptr.Op != OpSB =>
- (XCHGL [off1+off2] {mergeSymTyped(sym1,sym2)} val ptr mem)
+ (XCHGL [off1+off2] {mergeSym(sym1,sym2)} val ptr mem)
// Merge ADDQconst into atomic adds.
// TODO: merging LEAQ doesn't work, assembler doesn't like the resulting instructions.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 2df5016d59c..de5372670b3 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -902,7 +902,9 @@ func init() {
// Atomic memory updates.
{name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
+ {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
{name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
+ {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
}
var AMD64blocks = []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
index cfedde5d5e8..946acd4ccca 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -169,10 +169,10 @@
(Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 => (SRAconst (SLLconst x [24]) [31])
// constants
-(Const(8|16|32) ...) -> (MOVWconst ...)
-(Const(32F|64F) ...) -> (MOV(F|D)const ...)
+(Const(8|16|32) [val]) => (MOVWconst [int32(val)])
+(Const(32|64)F [val]) => (MOV(F|D)const [float64(val)])
(ConstNil) => (MOVWconst [0])
-(ConstBool ...) -> (MOVWconst ...)
+(ConstBool [b]) => (MOVWconst [b2i32(b)])
// truncations
// Because we ignore high parts of registers, truncates are just copies.
@@ -243,10 +243,10 @@
(Leq16U x y) => (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
(Leq32U x y) => (LessEqualU (CMP x y))
-(OffPtr [off] ptr:(SP)) -> (MOVWaddr [off] ptr)
-(OffPtr [off] ptr) -> (ADDconst [off] ptr)
+(OffPtr [off] ptr:(SP)) => (MOVWaddr [int32(off)] ptr)
+(OffPtr [off] ptr) => (ADDconst [int32(off)] ptr)
-(Addr ...) -> (MOVWaddr ...)
+(Addr {sym} base) => (MOVWaddr {sym} base)
(LocalAddr {sym} base _) => (MOVWaddr {sym} base)
// loads
@@ -433,30 +433,30 @@
(MOVDstore [off1] {sym} (SUBconst [off2] ptr) val mem) => (MOVDstore [off1-off2] {sym} ptr val mem)
(MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVBUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVHUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVFload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVFload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVHstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVWstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVFstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVFstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBreg x)
@@ -1470,6 +1470,6 @@
(GE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRLreg x y z) yes no)
(GE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRAreg x y z) yes no)
-(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read8(sym, off))])
-(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
-(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
+(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read8(sym, int64(off)))])
+(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+(MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c4a35326322..7e014fe0a8d 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -548,8 +548,10 @@
(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
// Currently the updated value is not used, but we need a register to temporarily hold it.
-(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
-(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
+(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
+(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
@@ -859,88 +861,88 @@
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVBUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVHUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWUload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVWUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVSload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (FMOVSload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVDload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (FMOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVHstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVWstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(STP [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val1 val2 mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (STP [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val1 val2 mem)
+ (STP [off1+off2] {mergeSym(sym1,sym2)} ptr val1 val2 mem)
(FMOVSstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (FMOVSstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) val mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (FMOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVBstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVHstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVWstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVDstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVQstorezero [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2))
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) =>
- (MOVQstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVQstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// store zero
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
@@ -1171,145 +1173,147 @@
(MUL x (MOVDconst [-1])) => (NEG x)
(MUL _ (MOVDconst [0])) => (MOVDconst [0])
(MUL x (MOVDconst [1])) => x
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x)
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (ADDshiftLL x x [log2(c-1)])
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (ADDshiftLL (NEG x) x [log2(c+1)])
-(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))
-(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))
-(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))
-(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log64(c-1)])
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG x) x [log64(c+1)])
+(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log64(c/3)] (ADDshiftLL x x [1]))
+(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log64(c/5)] (ADDshiftLL x x [2]))
+(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3]))
+(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log64(c/9)] (ADDshiftLL x x [3]))
(MULW x (MOVDconst [c])) && int32(c)==-1 => (NEG x)
(MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
(MULW x (MOVDconst [c])) && int32(c)==1 => x
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x)
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log2(c-1)])
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG x) x [log2(c+1)])
-(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (ADDshiftLL x x [1]))
-(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SLLconst [log2(c/5)] (ADDshiftLL x x [2]))
-(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (ADDshiftLL (NEG x) x [3]))
-(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SLLconst [log2(c/9)] (ADDshiftLL x x [3]))
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log64(c)] x)
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log64(c-1)])
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG x) x [log64(c+1)])
+(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log64(c/3)] (ADDshiftLL x x [1]))
+(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SLLconst [log64(c/5)] (ADDshiftLL x x [2]))
+(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log64(c/7)] (ADDshiftLL (NEG x) x [3]))
+(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SLLconst [log64(c/9)] (ADDshiftLL x x [3]))
// mneg by constant
(MNEG x (MOVDconst [-1])) => x
(MNEG _ (MOVDconst [0])) => (MOVDconst [0])
(MNEG x (MOVDconst [1])) => (NEG x)
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst [log2(c)] x))
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (NEG (ADDshiftLL x x [log2(c-1)]))
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (NEG (ADDshiftLL (NEG x) x [log2(c+1)]))
-(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst [log2(c/3)] (SUBshiftLL x x [2]))
-(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2])))
-(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst [log2(c/7)] (SUBshiftLL x x [3]))
-(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3])))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst [log64(c)] x))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL x x [log64(c-1)]))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL (NEG x) x [log64(c+1)]))
+(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log64(c/3)] (SUBshiftLL x x [2]))
+(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst [log64(c/5)] (ADDshiftLL x x [2])))
+(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log64(c/7)] (SUBshiftLL x x [3]))
+(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst [log64(c/9)] (ADDshiftLL x x [3])))
+
(MNEGW x (MOVDconst [c])) && int32(c)==-1 => x
(MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
(MNEGW x (MOVDconst [c])) && int32(c)==1 => (NEG x)
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst [log2(c)] x))
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL x x [log2(c-1)]))
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL (NEG x) x [log2(c+1)]))
-(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (SUBshiftLL x x [2]))
-(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (NEG (SLLconst [log2(c/5)] (ADDshiftLL x x [2])))
-(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (SUBshiftLL x x [3]))
-(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (NEG (SLLconst [log2(c/9)] (ADDshiftLL x x [3])))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst [log64(c)] x))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL x x [log64(c-1)]))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL (NEG x) x [log64(c+1)]))
+(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log64(c/3)] (SUBshiftLL x x [2]))
+(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (NEG (SLLconst [log64(c/5)] (ADDshiftLL x x [2])))
+(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log64(c/7)] (SUBshiftLL x x [3]))
+(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (NEG (SLLconst [log64(c/9)] (ADDshiftLL x x [3])))
+
(MADD a x (MOVDconst [-1])) => (SUB a x)
(MADD a _ (MOVDconst [0])) => a
(MADD a x (MOVDconst [1])) => (ADD a x)
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL x x [log2(c-1)]))
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL x x [log2(c+1)]))
-(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL x x [log64(c-1)]))
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL x x [log64(c+1)]))
+(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MADD a (MOVDconst [-1]) x) => (SUB a x)
(MADD a (MOVDconst [0]) _) => a
(MADD a (MOVDconst [1]) x) => (ADD a x)
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL x x [log2(c-1)]))
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL x x [log2(c+1)]))
-(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL x x [log64(c-1)]))
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL x x [log64(c+1)]))
+(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MADDW a x (MOVDconst [c])) && int32(c)==-1 => (SUB a x)
(MADDW a _ (MOVDconst [c])) && int32(c)==0 => a
(MADDW a x (MOVDconst [c])) && int32(c)==1 => (ADD a x)
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL x x [log2(c-1)]))
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL x x [log2(c+1)]))
-(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL x x [log64(c-1)]))
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL x x [log64(c+1)]))
+(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (SUB a x)
(MADDW a (MOVDconst [c]) _) && int32(c)==0 => a
(MADDW a (MOVDconst [c]) x) && int32(c)==1 => (ADD a x)
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL x x [log2(c-1)]))
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL x x [log2(c+1)]))
-(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log64(c)])
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL x x [log64(c-1)]))
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL x x [log64(c+1)]))
+(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MSUB a x (MOVDconst [-1])) => (ADD a x)
(MSUB a _ (MOVDconst [0])) => a
(MSUB a x (MOVDconst [1])) => (SUB a x)
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL x x [log2(c-1)]))
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL x x [log2(c+1)]))
-(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL x x [log64(c-1)]))
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL x x [log64(c+1)]))
+(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MSUB a (MOVDconst [-1]) x) => (ADD a x)
(MSUB a (MOVDconst [0]) _) => a
(MSUB a (MOVDconst [1]) x) => (SUB a x)
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL x x [log2(c-1)]))
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL x x [log2(c+1)]))
-(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL x x [log64(c-1)]))
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL x x [log64(c+1)]))
+(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (ADD a x)
(MSUBW a _ (MOVDconst [c])) && int32(c)==0 => a
(MSUBW a x (MOVDconst [c])) && int32(c)==1 => (SUB a x)
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL x x [log2(c-1)]))
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL x x [log2(c+1)]))
-(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL x x [log64(c-1)]))
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL x x [log64(c+1)]))
+(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
(MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (ADD a x)
(MSUBW a (MOVDconst [c]) _) && int32(c)==0 => a
(MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (SUB a x)
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL x x [log2(c-1)]))
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL x x [log2(c+1)]))
-(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [2]) [log2(c/3)])
-(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [2]) [log2(c/5)])
-(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [3]) [log2(c/7)])
-(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log2(c/9)])
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log64(c)])
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL x x [log64(c-1)]))
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL x x [log64(c+1)]))
+(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [2]) [log64(c/3)])
+(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [2]) [log64(c/5)])
+(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL x x [3]) [log64(c/7)])
+(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL x x [3]) [log64(c/9)])
// div by constant
(UDIV x (MOVDconst [1])) => x
-(UDIV x (MOVDconst [c])) && isPowerOfTwo(c) => (SRLconst [log2(c)] x)
+(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log64(c)] x)
(UDIVW x (MOVDconst [c])) && uint32(c)==1 => x
-(UDIVW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (SRLconst [log2(c)] x)
+(UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log64(c)] x)
(UMOD _ (MOVDconst [1])) => (MOVDconst [0])
-(UMOD x (MOVDconst [c])) && isPowerOfTwo(c) => (ANDconst [c-1] x)
+(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
(UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0])
-(UMODW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (ANDconst [c-1] x)
+(UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x)
// generic simplifications
(ADD x (NEG y)) => (SUB x y)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 9ff53f7e4ec..fe9edbf9333 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -656,12 +656,14 @@ func init() {
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero.
- // LDAXRB (Rarg0), Rout
+ // LDAXR (Rarg0), Rout
// AND/OR Rarg1, Rout
- // STLXRB Rout, (Rarg0), Rtmp
+ // STLXR Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary,
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS.rules b/src/cmd/compile/internal/ssa/gen/MIPS.rules
index 96feaf9234b..aff12b4e363 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules
@@ -143,7 +143,7 @@
(Const(32|16|8) [val]) => (MOVWconst [int32(val)])
(Const(32|64)F ...) => (MOV(F|D)const ...)
(ConstNil) => (MOVWconst [0])
-(ConstBool [b]) => (MOVWconst [int32(b2i(b))])
+(ConstBool [b]) => (MOVWconst [b2i32(b)])
// truncations
// Because we ignore high parts of registers, truncates are just copies.
@@ -383,6 +383,9 @@
(ANDconst [3]
(XORconst [3] ptr)))))) mem)
+(AtomicAnd32 ...) => (LoweredAtomicAnd ...)
+(AtomicOr32 ...) => (LoweredAtomicOr ...)
+
// checks
(NilCheck ...) => (LoweredNilCheck ...)
@@ -459,36 +462,36 @@
(MOVWstorezero [off1] {sym} x:(ADDconst [off2] ptr) mem) && (is16Bit(int64(off1+off2)) || x.Uses == 1) => (MOVWstorezero [off1+off2] {sym} ptr mem)
(MOVBload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVBUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHUload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVHUload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVFload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVFload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVFload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVHstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVWstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVFstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVFstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVFstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) =>
- (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVBstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVHstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWstorezero [off1] {sym1} (MOVWaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) =>
- (MOVWstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) => (MOVBreg x)
@@ -581,13 +584,13 @@
(Select0 (MULTU (MOVWconst [1]) _ )) => (MOVWconst [0])
(Select1 (MULTU (MOVWconst [-1]) x )) => (NEG x)
(Select0 (MULTU (MOVWconst [-1]) x )) => (CMOVZ (ADDconst [-1] x) (MOVWconst [0]) x)
-(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
-(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x)
+(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
+(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x)
(MUL (MOVWconst [0]) _ ) => (MOVWconst [0])
(MUL (MOVWconst [1]) x ) => x
(MUL (MOVWconst [-1]) x ) => (NEG x)
-(MUL (MOVWconst [c]) x ) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
+(MUL (MOVWconst [c]) x ) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
// generic simplifications
(ADD x (NEG y)) => (SUB x y)
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64.rules b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
index e008ec87036..9af0f933333 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
@@ -462,44 +462,44 @@
(MOVVstorezero [off1] {sym} (ADDVconst [off2] ptr) mem) && is32Bit(int64(off1)+off2) => (MOVVstorezero [off1+int32(off2)] {sym} ptr mem)
(MOVBload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVBUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBUload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVHload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVHUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVHUload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVWUload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWUload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWUload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVVload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVVload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVVload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVFload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVFload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVFload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVDload [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDload [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVBstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVBstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVHstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVHstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVHstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVWstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVWstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVVstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVVstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVVstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVFstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVFstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVFstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVDstore [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVDstore [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBstorezero [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVHstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVHstorezero [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVWstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWstorezero [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
(MOVVstorezero [off1] {sym1} (MOVVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVVstorezero [off1+int32(off2)] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVVstorezero [off1+int32(off2)] {mergeSym(sym1,sym2)} ptr mem)
// store zero
(MOVBstore [off] {sym} ptr (MOVVconst [0]) mem) => (MOVBstorezero [off] {sym} ptr mem)
@@ -580,13 +580,13 @@
(Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x)
(Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0])
(Select1 (MULVU x (MOVVconst [1]))) => x
-(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SLLVconst [log2(c)] x)
+(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SLLVconst [log64(c)] x)
// div by constant
(Select1 (DIVVU x (MOVVconst [1]))) => x
-(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SRLVconst [log2(c)] x)
+(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SRLVconst [log64(c)] x)
(Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0]) // mod
-(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod
+(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod
// generic simplifications
(ADDV x (NEGV y)) => (SUBV x y)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index 11b1a318fe0..31b186d1679 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -150,6 +150,31 @@
(ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31])
(ROTL x (MOVDconst [c])) => (ROTLconst x [c&63])
+// Combine rotate and mask operations
+(ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
+(AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
+(ANDconst [m] (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
+(AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
+
+// Note, any rotated word bitmask is still a valid word bitmask.
+(ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
+(ROTLWconst [r] (ANDconst [m] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
+
+(ANDconst [m] (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
+(ANDconst [m] (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
+(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
+(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
+
+(SRWconst (ANDconst [m] x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
+(SRWconst (ANDconst [m] x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
+(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
+(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
+
+// Merge shift right + shift left and clear left (e.g for a table lookup)
+(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
+(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x)
+// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF]
+(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)
// large constant shifts
(Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 => (MOVDconst [0])
@@ -863,48 +888,48 @@
// is only one use.
(MOVBstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVHstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVWstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
- (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVSstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (FMOVSstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (FMOVSstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (FMOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} ptr val mem)
+ (FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVBZload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVBZload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHZload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVHZload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWZload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (MOVWZload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
- (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVSload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (FMOVSload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (FMOVSload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& is16Bit(int64(off1+off2)) && (ptr.Op != OpSB || p.Uses == 1) =>
- (FMOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (FMOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
// Fold offsets for loads.
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is16Bit(int64(off1)+off2) => (FMOVSload [off1+int32(off2)] {sym} ptr mem)
@@ -954,16 +979,16 @@
// Fold symbols into storezero
(MOVDstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 =>
- (MOVDstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} x mem)
+ (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
(MOVWstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) =>
- (MOVWstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} x mem)
+ (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
(MOVHstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) =>
- (MOVHstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} x mem)
+ (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
(MOVBstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) =>
- (MOVBstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} x mem)
+ (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
// atomic intrinsics
(AtomicLoad(8|32|64|Ptr) ptr mem) => (LoweredAtomicLoad(8|32|64|Ptr) [1] ptr mem)
@@ -980,8 +1005,10 @@
(AtomicCompareAndSwap(32|64) ptr old new_ mem) => (LoweredAtomicCas(32|64) [1] ptr old new_ mem)
(AtomicCompareAndSwapRel32 ptr old new_ mem) => (LoweredAtomicCas32 [0] ptr old new_ mem)
-(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
-(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
+(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
+(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicOr32 ...) => (LoweredAtomicOr32 ...)
(Slicemask x) => (SRADconst (NEG x) [63])
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index 58856605974..f7198b90c32 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -137,6 +137,7 @@ func init() {
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
+ gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
@@ -227,6 +228,10 @@ func init() {
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
+ {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
+ {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
+ {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
+
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
@@ -602,25 +607,22 @@ func init() {
{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
// atomic add32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// ADD Rarg1, Rout
// STDCCC Rout, (Rarg0)
// BNE -3(PC)
- // ISYNC
// return new sum
-
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
// atomic exchange32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// STDCCC Rarg1, (Rarg0)
// BNE -2(PC)
// ISYNC
// return old val
-
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
@@ -643,15 +645,16 @@ func init() {
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
- // atomic 8 and/or.
+ // atomic 8/32 and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
- // LBAR (Rarg0), Rtmp
+ // LBAR/LWAT (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
- // STBCCC Rtmp, (Rarg0), Rtmp
+ // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
// BNE Rtmp, -3(PC)
-
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
index 9437c8e9d4b..4380a5efef7 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
@@ -3,17 +3,12 @@
// license that can be found in the LICENSE file.
// Optimizations TODO:
-// * Somehow track when values are already zero/signed-extended, avoid re-extending.
// * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers
-// * Find a more efficient way to do zero/sign extension than left+right shift.
-// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...),
-// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway.
// * Use the zero register instead of moving 0 into a register.
// * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...).
// * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants.
// * Arrange for non-trivial Zero and Move lowerings to use aligned loads and stores.
// * Eliminate zero immediate shifts, adds, etc.
-// * Use a Duff's device for some moves and zeros.
// * Avoid using Neq32 for writeBarrier.enabled checks.
// Lowering arithmetic
@@ -66,8 +61,8 @@
(Mod32u ...) => (REMUW ...)
(Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y))
(Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
+(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
(And64 ...) => (AND ...)
(And32 ...) => (AND ...)
@@ -98,25 +93,21 @@
(Sqrt ...) => (FSQRTD ...)
-// Zero and sign extension
-// Shift left until the bits we want are at the top of the register.
-// Then logical/arithmetic shift right for zero/sign extend.
-// We always extend to 64 bits; there's no reason not to,
-// and optimization rules can then collapse some extensions.
+// Sign and zero extension.
-(SignExt8to16 x) => (SRAI [56] (SLLI [56] x))
-(SignExt8to32 x) => (SRAI [56] (SLLI [56] x))
-(SignExt8to64 x) => (SRAI [56] (SLLI [56] x))
-(SignExt16to32 x) => (SRAI [48] (SLLI [48] x))
-(SignExt16to64 x) => (SRAI [48] (SLLI [48] x))
-(SignExt32to64 x) => (ADDIW [0] x)
+(SignExt8to16 ...) => (MOVBreg ...)
+(SignExt8to32 ...) => (MOVBreg ...)
+(SignExt8to64 ...) => (MOVBreg ...)
+(SignExt16to32 ...) => (MOVHreg ...)
+(SignExt16to64 ...) => (MOVHreg ...)
+(SignExt32to64 ...) => (MOVWreg ...)
-(ZeroExt8to16 x) => (SRLI [56] (SLLI [56] x))
-(ZeroExt8to32 x) => (SRLI [56] (SLLI [56] x))
-(ZeroExt8to64 x) => (SRLI [56] (SLLI [56] x))
-(ZeroExt16to32 x) => (SRLI [48] (SLLI [48] x))
-(ZeroExt16to64 x) => (SRLI [48] (SLLI [48] x))
-(ZeroExt32to64 x) => (SRLI [32] (SLLI [32] x))
+(ZeroExt8to16 ...) => (MOVBUreg ...)
+(ZeroExt8to32 ...) => (MOVBUreg ...)
+(ZeroExt8to64 ...) => (MOVBUreg ...)
+(ZeroExt16to32 ...) => (MOVHUreg ...)
+(ZeroExt16to64 ...) => (MOVHUreg ...)
+(ZeroExt32to64 ...) => (MOVWUreg ...)
(Cvt32to32F ...) => (FCVTSW ...)
(Cvt32to64F ...) => (FCVTDW ...)
@@ -261,16 +252,16 @@
(EqPtr x y) => (SEQZ (SUB x y))
(Eq64 x y) => (SEQZ (SUB x y))
(Eq32 x y) => (SEQZ (SUBW x y))
-(Eq16 x y) => (SEQZ (ZeroExt16to64 (SUB x y)))
-(Eq8 x y) => (SEQZ (ZeroExt8to64 (SUB x y)))
+(Eq16 x y) => (SEQZ (SUB (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Eq8 x y) => (SEQZ (SUB (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Eq64F ...) => (FEQD ...)
(Eq32F ...) => (FEQS ...)
(NeqPtr x y) => (SNEZ (SUB x y))
(Neq64 x y) => (SNEZ (SUB x y))
(Neq32 x y) => (SNEZ (SUBW x y))
-(Neq16 x y) => (SNEZ (ZeroExt16to64 (SUB x y)))
-(Neq8 x y) => (SNEZ (ZeroExt8to64 (SUB x y)))
+(Neq16 x y) => (SNEZ (SUB (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Neq8 x y) => (SNEZ (SUB (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Neq64F ...) => (FNED ...)
(Neq32F ...) => (FNES ...)
@@ -291,42 +282,42 @@
(Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
// We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis
// knows what variables are being read/written by the ops.
(MOVBUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVBUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVBUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVBload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVBload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVHUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVHUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVHUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVHload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVHload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVHload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVWUload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVWUload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVWUload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVWload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVWload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVWload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVDload [off1] {sym1} (MOVaddr [off2] {sym2} base) mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVDload [off1+off2] {mergeSymTyped(sym1,sym2)} base mem)
+ (MOVDload [off1+off2] {mergeSym(sym1,sym2)} base mem)
(MOVBstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVBstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVBstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVHstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVHstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVHstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVWstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVWstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVWstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVDstore [off1] {sym1} (MOVaddr [off2] {sym2} base) val mem) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
- (MOVDstore [off1+off2] {mergeSymTyped(sym1,sym2)} base val mem)
+ (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
(MOVBstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVBstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVBstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVHstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVHstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVHstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVWstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVWstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDstorezero [off1] {sym1} (MOVaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2) && is32Bit(int64(off1)+int64(off2)) =>
- (MOVDstorezero [off1+off2] {mergeSymTyped(sym1,sym2)} ptr mem)
+ (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVBUload [off1] {sym} (ADDI [off2] base) mem) && is32Bit(int64(off1)+off2) =>
(MOVBUload [off1+int32(off2)] {sym} base mem)
@@ -360,13 +351,66 @@
// with OffPtr -> ADDI.
(ADDI [c] (MOVaddr [d] {s} x)) && is32Bit(c+int64(d)) => (MOVaddr [int32(c)+d] {s} x)
-// Zeroing
-// TODO: more optimized zeroing, including attempting to use aligned accesses.
-(Zero [0] _ mem) => mem
-(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst) mem)
-(Zero [2] ptr mem) => (MOVHstore ptr (MOVHconst) mem)
-(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
-(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
+// Small zeroing
+(Zero [0] _ mem) => mem
+(Zero [1] ptr mem) => (MOVBstore ptr (MOVBconst [0]) mem)
+(Zero [2] {t} ptr mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore ptr (MOVHconst [0]) mem)
+(Zero [2] ptr mem) =>
+ (MOVBstore [1] ptr (MOVBconst [0])
+ (MOVBstore ptr (MOVBconst [0]) mem))
+(Zero [4] {t} ptr mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore ptr (MOVWconst [0]) mem)
+(Zero [4] {t} ptr mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [2] ptr (MOVHconst [0])
+ (MOVHstore ptr (MOVHconst [0]) mem))
+(Zero [4] ptr mem) =>
+ (MOVBstore [3] ptr (MOVBconst [0])
+ (MOVBstore [2] ptr (MOVBconst [0])
+ (MOVBstore [1] ptr (MOVBconst [0])
+ (MOVBstore ptr (MOVBconst [0]) mem))))
+(Zero [8] {t} ptr mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore ptr (MOVDconst [0]) mem)
+(Zero [8] {t} ptr mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore [4] ptr (MOVWconst [0])
+ (MOVWstore ptr (MOVWconst [0]) mem))
+(Zero [8] {t} ptr mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [6] ptr (MOVHconst [0])
+ (MOVHstore [4] ptr (MOVHconst [0])
+ (MOVHstore [2] ptr (MOVHconst [0])
+ (MOVHstore ptr (MOVHconst [0]) mem))))
+
+(Zero [3] ptr mem) =>
+ (MOVBstore [2] ptr (MOVBconst [0])
+ (MOVBstore [1] ptr (MOVBconst [0])
+ (MOVBstore ptr (MOVBconst [0]) mem)))
+(Zero [6] {t} ptr mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [4] ptr (MOVHconst [0])
+ (MOVHstore [2] ptr (MOVHconst [0])
+ (MOVHstore ptr (MOVHconst [0]) mem)))
+(Zero [12] {t} ptr mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore [8] ptr (MOVWconst [0])
+ (MOVWstore [4] ptr (MOVWconst [0])
+ (MOVWstore ptr (MOVWconst [0]) mem)))
+(Zero [16] {t} ptr mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [8] ptr (MOVDconst [0])
+ (MOVDstore ptr (MOVDconst [0]) mem))
+(Zero [24] {t} ptr mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [16] ptr (MOVDconst [0])
+ (MOVDstore [8] ptr (MOVDconst [0])
+ (MOVDstore ptr (MOVDconst [0]) mem)))
+(Zero [32] {t} ptr mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [24] ptr (MOVDconst [0])
+ (MOVDstore [16] ptr (MOVDconst [0])
+ (MOVDstore [8] ptr (MOVDconst [0])
+ (MOVDstore ptr (MOVDconst [0]) mem))))
+
+// Medium 8-aligned zeroing uses a Duff's device
+// 8 and 128 are magic constants, see runtime/mkduff.go
+(Zero [s] {t} ptr mem)
+ && s%8 == 0 && s <= 8*128
+ && t.Alignment()%8 == 0 && !config.noDuffDevice =>
+ (DUFFZERO [8 * (128 - s/8)] ptr mem)
// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) =>
@@ -378,7 +422,7 @@
(Convert ...) => (MOVconvert ...)
// Checks
-(IsNonNil p) => (NeqPtr (MOVDconst) p)
+(IsNonNil p) => (NeqPtr (MOVDconst [0]) p)
(IsInBounds ...) => (Less64U ...)
(IsSliceInBounds ...) => (Leq64U ...)
@@ -395,13 +439,66 @@
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
-// Moves
-// TODO: more optimized moves, including attempting to use aligned accesses.
-(Move [0] _ _ mem) => mem
+// Small moves
+(Move [0] _ _ mem) => mem
(Move [1] dst src mem) => (MOVBstore dst (MOVBload src mem) mem)
-(Move [2] dst src mem) => (MOVHstore dst (MOVHload src mem) mem)
-(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
-(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
+(Move [2] {t} dst src mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore dst (MOVHload src mem) mem)
+(Move [2] dst src mem) =>
+ (MOVBstore [1] dst (MOVBload [1] src mem)
+ (MOVBstore dst (MOVBload src mem) mem))
+(Move [4] {t} dst src mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore dst (MOVWload src mem) mem)
+(Move [4] {t} dst src mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [2] dst (MOVHload [2] src mem)
+ (MOVHstore dst (MOVHload src mem) mem))
+(Move [4] dst src mem) =>
+ (MOVBstore [3] dst (MOVBload [3] src mem)
+ (MOVBstore [2] dst (MOVBload [2] src mem)
+ (MOVBstore [1] dst (MOVBload [1] src mem)
+ (MOVBstore dst (MOVBload src mem) mem))))
+(Move [8] {t} dst src mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore dst (MOVDload src mem) mem)
+(Move [8] {t} dst src mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore [4] dst (MOVWload [4] src mem)
+ (MOVWstore dst (MOVWload src mem) mem))
+(Move [8] {t} dst src mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [6] dst (MOVHload [6] src mem)
+ (MOVHstore [4] dst (MOVHload [4] src mem)
+ (MOVHstore [2] dst (MOVHload [2] src mem)
+ (MOVHstore dst (MOVHload src mem) mem))))
+
+(Move [3] dst src mem) =>
+ (MOVBstore [2] dst (MOVBload [2] src mem)
+ (MOVBstore [1] dst (MOVBload [1] src mem)
+ (MOVBstore dst (MOVBload src mem) mem)))
+(Move [6] {t} dst src mem) && t.Alignment()%2 == 0 =>
+ (MOVHstore [4] dst (MOVHload [4] src mem)
+ (MOVHstore [2] dst (MOVHload [2] src mem)
+ (MOVHstore dst (MOVHload src mem) mem)))
+(Move [12] {t} dst src mem) && t.Alignment()%4 == 0 =>
+ (MOVWstore [8] dst (MOVWload [8] src mem)
+ (MOVWstore [4] dst (MOVWload [4] src mem)
+ (MOVWstore dst (MOVWload src mem) mem)))
+(Move [16] {t} dst src mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [8] dst (MOVDload [8] src mem)
+ (MOVDstore dst (MOVDload src mem) mem))
+(Move [24] {t} dst src mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [16] dst (MOVDload [16] src mem)
+ (MOVDstore [8] dst (MOVDload [8] src mem)
+ (MOVDstore dst (MOVDload src mem) mem)))
+(Move [32] {t} dst src mem) && t.Alignment()%8 == 0 =>
+ (MOVDstore [24] dst (MOVDload [24] src mem)
+ (MOVDstore [16] dst (MOVDload [16] src mem)
+ (MOVDstore [8] dst (MOVDload [8] src mem)
+ (MOVDstore dst (MOVDload src mem) mem))))
+
+// Medium 8-aligned move uses a Duff's device
+// 16 and 128 are magic constants, see runtime/mkduff.go
+(Move [s] {t} dst src mem)
+ && s%8 == 0 && s <= 8*128 && t.Alignment()%8 == 0
+ && !config.noDuffDevice && logLargeCopy(v, s) =>
+ (DUFFCOPY [16 * (128 - s/8)] dst src mem)
// Generic move uses a loop
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
@@ -424,6 +521,8 @@
(OffPtr [off] ptr) && is32Bit(off) => (ADDI [off] ptr)
(OffPtr [off] ptr) => (ADD (MOVDconst [off]) ptr)
+// TODO(jsing): Check if we actually need MOV{B,H,W}const as most platforms
+// use a single MOVDconst op.
(Const8 ...) => (MOVBconst ...)
(Const16 ...) => (MOVHconst ...)
(Const32 ...) => (MOVWconst ...)
@@ -501,6 +600,79 @@
(MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
+// Avoid sign/zero extension for consts.
+(MOVBreg (MOVBconst [c])) => (MOVDconst [int64(c)])
+(MOVHreg (MOVBconst [c])) => (MOVDconst [int64(c)])
+(MOVHreg (MOVHconst [c])) => (MOVDconst [int64(c)])
+(MOVWreg (MOVBconst [c])) => (MOVDconst [int64(c)])
+(MOVWreg (MOVHconst [c])) => (MOVDconst [int64(c)])
+(MOVWreg (MOVWconst [c])) => (MOVDconst [int64(c)])
+(MOVBUreg (MOVBconst [c])) => (MOVDconst [int64(uint8(c))])
+(MOVHUreg (MOVBconst [c])) => (MOVDconst [int64(uint16(c))])
+(MOVHUreg (MOVHconst [c])) => (MOVDconst [int64(uint16(c))])
+(MOVWUreg (MOVBconst [c])) => (MOVDconst [int64(uint32(c))])
+(MOVWUreg (MOVHconst [c])) => (MOVDconst [int64(uint32(c))])
+(MOVWUreg (MOVWconst [c])) => (MOVDconst [int64(uint32(c))])
+
+// Avoid sign/zero extension after properly typed load.
+(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
+
+// Fold double extensions.
+(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
+
+// Do not extend before store.
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+
+// Replace extend after load with alternate load where possible.
+(MOVBreg x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload [off] {sym} ptr mem)
+(MOVHreg x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload [off] {sym} ptr mem)
+(MOVWreg x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload [off] {sym} ptr mem)
+(MOVBUreg x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload [off] {sym} ptr mem)
+(MOVHUreg x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload [off] {sym} ptr mem)
+(MOVWUreg x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload