go/src/cmd/internal/sys/arch.go

238 lines
4.8 KiB
Go
Raw Normal View History

// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package sys
import "encoding/binary"
// ArchFamily represents a family of one or more related architectures.
// For example, ppc64 and ppc64le are both members of the PPC64 family.
type ArchFamily byte
const (
NoArch ArchFamily = iota
AMD64
ARM
ARM64
I386
Loong64
MIPS
MIPS64
PPC64
RISCV64
S390X
Wasm
)
// Arch represents an individual architecture.
type Arch struct {
Name string
Family ArchFamily
ByteOrder binary.ByteOrder
// PtrSize is the size in bytes of pointers and the
// predeclared "int", "uint", and "uintptr" types.
PtrSize int
// RegSize is the size in bytes of general purpose registers.
RegSize int
// MinLC is the minimum length of an instruction code.
MinLC int
// Alignment is maximum alignment required by the architecture
// for any (compiler-generated) load or store instruction.
// Loads or stores smaller than Alignment must be naturally aligned.
// Loads or stores larger than Alignment need only be Alignment-aligned.
Alignment int8
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
// CanMergeLoads reports whether the backend optimization passes
// can combine adjacent loads into a single larger, possibly unaligned, load.
// Note that currently the optimizations must be able to handle little endian byte order.
CanMergeLoads bool
}
// InFamily reports whether a is a member of any of the specified
// architecture families.
func (a *Arch) InFamily(xs ...ArchFamily) bool {
for _, x := range xs {
if a.Family == x {
return true
}
}
return false
}
var Arch386 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "386",
Family: I386,
ByteOrder: binary.LittleEndian,
PtrSize: 4,
RegSize: 4,
MinLC: 1,
Alignment: 1,
CanMergeLoads: true,
}
var ArchAMD64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "amd64",
Family: AMD64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 1,
Alignment: 1,
CanMergeLoads: true,
}
var ArchARM = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "arm",
Family: ARM,
ByteOrder: binary.LittleEndian,
PtrSize: 4,
RegSize: 4,
MinLC: 4,
Alignment: 4, // TODO: just for arm5?
CanMergeLoads: false,
}
var ArchARM64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "arm64",
Family: ARM64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 1,
CanMergeLoads: true,
}
var ArchLoong64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "loong64",
Family: Loong64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 8, // Unaligned accesses are not guaranteed to be fast
CanMergeLoads: false,
}
var ArchMIPS = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "mips",
Family: MIPS,
ByteOrder: binary.BigEndian,
PtrSize: 4,
RegSize: 4,
MinLC: 4,
Alignment: 4,
CanMergeLoads: false,
}
var ArchMIPSLE = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "mipsle",
Family: MIPS,
ByteOrder: binary.LittleEndian,
PtrSize: 4,
RegSize: 4,
MinLC: 4,
Alignment: 4,
CanMergeLoads: false,
}
var ArchMIPS64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "mips64",
Family: MIPS64,
ByteOrder: binary.BigEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 8,
CanMergeLoads: false,
}
var ArchMIPS64LE = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "mips64le",
Family: MIPS64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 8,
CanMergeLoads: false,
}
var ArchPPC64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "ppc64",
Family: PPC64,
ByteOrder: binary.BigEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 1,
CanMergeLoads: false,
}
var ArchPPC64LE = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "ppc64le",
Family: PPC64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 1,
CanMergeLoads: true,
}
var ArchRISCV64 = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "riscv64",
Family: RISCV64,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 4,
Alignment: 8, // riscv unaligned loads work, but are really slow (trap + simulated by OS)
CanMergeLoads: false,
}
var ArchS390X = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "s390x",
Family: S390X,
ByteOrder: binary.BigEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 2,
Alignment: 1,
CanMergeLoads: true,
}
var ArchWasm = &Arch{
cmd/compile: make encoding/binary loads/stores cheaper to inline The encoding/binary little- and big-endian load and store routines are frequently used in performance sensitive code. They look fairly complex to the inliner. Though the routines themselves can be inlined, code using them typically cannot be. Yet they typically compile down to an instruction or two on architectures that support merging such loads. This change teaches the inliner to treat calls to these methods as cheap, so that code using them will be more inlineable. It'd be better to teach the inliner that this pattern of code is cheap, rather than these particular methods. However, that is difficult to do robustly when working with the IR representation. And the broader project of which that would be a part, namely to model the rest of the compiler in the inliner, is probably a non-starter. By way of contrast, imperfect though it is, this change is an easy, cheap, and useful heuristic. If/when we base inlining decisions on more accurate information obtained later in the compilation process, or on PGO/FGO, we can remove this and other such heuristics. Newly inlineable functions in the standard library: crypto/cipher.gcmInc32 crypto/sha512.appendUint64 crypto/md5.appendUint64 crypto/sha1.appendUint64 crypto/sha256.appendUint64 vendor/golang.org/x/crypto/poly1305.initialize encoding/gob.(*encoderState).encodeUint vendor/golang.org/x/text/unicode/norm.buildRecompMap net/http.(*http2SettingsFrame).Setting net/http.http2parseGoAwayFrame net/http.http2parseWindowUpdateFrame Benchmark impact for encoding/gob (the only package I measured): name old time/op new time/op delta EndToEndPipe-8 2.25µs ± 1% 2.21µs ± 3% -1.79% (p=0.000 n=28+27) EndToEndByteBuffer-8 93.3ns ± 5% 94.2ns ± 5% ~ (p=0.174 n=30+30) EndToEndSliceByteBuffer-8 10.5µs ± 1% 10.6µs ± 1% +0.87% (p=0.000 n=30+30) EncodeComplex128Slice-8 1.81µs ± 0% 1.75µs ± 1% -3.23% (p=0.000 n=28+30) EncodeFloat64Slice-8 900ns ± 1% 847ns ± 0% -5.91% (p=0.000 n=29+28) EncodeInt32Slice-8 1.02µs ± 0% 0.90µs ± 0% -11.82% (p=0.000 n=28+26) EncodeStringSlice-8 1.16µs ± 1% 1.04µs ± 1% -10.20% (p=0.000 n=29+26) EncodeInterfaceSlice-8 28.7µs ± 3% 29.2µs ± 6% ~ (p=0.067 n=29+30) DecodeComplex128Slice-8 7.98µs ± 1% 7.96µs ± 1% -0.27% (p=0.017 n=30+30) DecodeFloat64Slice-8 4.33µs ± 1% 4.34µs ± 1% +0.24% (p=0.022 n=30+29) DecodeInt32Slice-8 4.18µs ± 1% 4.18µs ± 0% ~ (p=0.074 n=30+28) DecodeStringSlice-8 13.2µs ± 1% 13.1µs ± 1% -0.64% (p=0.000 n=28+28) DecodeStringsSlice-8 31.9µs ± 1% 31.8µs ± 1% -0.34% (p=0.001 n=30+30) DecodeBytesSlice-8 8.88µs ± 1% 8.84µs ± 1% -0.48% (p=0.000 n=30+30) DecodeInterfaceSlice-8 64.1µs ± 1% 64.2µs ± 1% ~ (p=0.173 n=30+28) DecodeMap-8 74.3µs ± 0% 74.2µs ± 0% ~ (p=0.131 n=29+30) Fixes #42958 Change-Id: Ie048b8976fb403d8bcc72ac6bde4b33e133e2a47 Reviewed-on: https://go-review.googlesource.com/c/go/+/349931 Trust: Josh Bleecher Snyder <josharian@gmail.com> Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
2021-09-13 15:28:55 -07:00
Name: "wasm",
Family: Wasm,
ByteOrder: binary.LittleEndian,
PtrSize: 8,
RegSize: 8,
MinLC: 1,
Alignment: 1,
CanMergeLoads: false,
}
var Archs = [...]*Arch{
Arch386,
ArchAMD64,
ArchARM,
ArchARM64,
ArchLoong64,
ArchMIPS,
ArchMIPSLE,
ArchMIPS64,
ArchMIPS64LE,
ArchPPC64,
ArchPPC64LE,
ArchRISCV64,
ArchS390X,
ArchWasm,
}