cmd/internal/obj/arm64: shorten constant integer loads

Large integer constants can take up to 4 instructions to encode.

We can encode some large constants with a single instruction, namely
those which are bit patterns (repetitions of certain runs of 0s and 1s).

Often the constants we want to encode are *close* to those bit patterns,
but don't exactly match. For those, we can use 2 instructions, one to
load the close-by bit pattern and one to fix up any mismatches.

The constants we use to strength reduce divides often fit this pattern.
For unsigned divides by 1 through 15, this CL applies to the constant
for N=3,5,6,10,12,15.

Triggers 17 times in hello world.

Change-Id: I623abf32961fb3e74d0a163f6822f0647cd94499
Reviewed-on: https://go-review.googlesource.com/c/go/+/717900
Auto-Submit: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Keith Randall 2025-11-04 15:15:16 -08:00 committed by Gopher Robot
parent 5f4b5f1a19
commit 3b3d6b9e5d
4 changed files with 61 additions and 1 deletions

View file

@ -400,6 +400,8 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
MOVD $0xaaaa0000aaaa1111, R1 // MOVD $-6149102338357718767, R1 // 212282d24155b5f24155f5f2
MOVD $0x1111ffff1111aaaa, R1 // MOVD $1230045644216969898, R1 // a1aa8a922122a2f22122e2f2
MOVD $0xaaaaaaaaaaaaaaab, R1 // MOVD $-6148914691236517205, R1 // e1f301b2615595f2
MOVD $0x0ff019940ff00ff0, R1 // MOVD $1148446028692721648, R1 // e19f0cb28132c3f2
MOVD $0, R1 // e1031faa
MOVD $-1, R1 // 01008092
MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2

View file

@ -38,6 +38,7 @@ import (
"fmt"
"log"
"math"
"math/bits"
"slices"
"strings"
)
@ -1976,7 +1977,18 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
return C_MOVCON
} else if zeroCount == 2 || negCount == 2 {
return C_MOVCON2
} else if zeroCount == 1 || negCount == 1 {
}
// See omovlconst for description of this loop.
for i := 0; i < 4; i++ {
mask := uint64(0xffff) << (i * 16)
for period := 2; period <= 32; period *= 2 {
x := uint64(a.Offset)&^mask | bits.RotateLeft64(uint64(a.Offset), max(period, 16))&mask
if isbitcon(x) {
return C_MOVCON2
}
}
}
if zeroCount == 1 || negCount == 1 {
return C_MOVCON3
} else {
return C_VCON
@ -7555,6 +7567,31 @@ func (c *ctxt7) omovlconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int, os []uin
}
}
return 2
}
// Look for a two instruction pair, a bit pattern encodeable
// as a bitcon immediate plus a fixup MOVK instruction.
// Constants like this often occur from strength reduction of divides.
for i = 0; i < 4; i++ {
mask := uint64(0xffff) << (i * 16)
for period := 2; period <= 32; period *= 2 { // TODO: handle period==64 somehow?
// Copy in bits from outside of the masked region
x := uint64(d)&^mask | bits.RotateLeft64(uint64(d), max(period, 16))&mask
if isbitcon(x) {
// ORR $c1, ZR, rt
os[0] = c.opirr(p, AORR)
os[0] |= bitconEncode(x, 64) | uint32(REGZERO&31)<<5 | uint32(rt&31)
// MOVK $c2<<(i*16), rt
os[1] = c.opirr(p, AMOVK)
os[1] |= MOVCONST(d, i, rt)
return 2
}
}
}
// TODO: other fixups, like ADD or SUB?
// TODO: 3-instruction variant, instead of the full MOVD+3*MOVK version below?
switch {
case zeroCount == 1:
// one MOVZ and two MOVKs

View file

@ -38,3 +38,16 @@ func TestMOVK(t *testing.T) {
t.Errorf("Got %x want %x\n", x, want)
}
}
func testCombined() (a uint64, b uint64)
func TestCombined(t *testing.T) {
got1, got2 := testCombined()
want1 := uint64(0xaaaaaaaaaaaaaaab)
want2 := uint64(0x0ff019940ff00ff0)
if got1 != want1 {
t.Errorf("First result, got %x want %x", got1, want1)
}
if got2 != want2 {
t.Errorf("First result, got %x want %x", got2, want2)
}
}

View file

@ -37,3 +37,11 @@ TEXT ·testmovk(SB), NOSPLIT, $0-8
MOVK $(40000<<48), R0
MOVD R0, ret+0(FP)
RET
// testCombined() (uint64, uint64)
TEXT ·testCombined(SB), NOSPLIT, $0-16
MOVD $0xaaaaaaaaaaaaaaab, R0
MOVD $0x0ff019940ff00ff0, R1
MOVD R0, a+0(FP)
MOVD R1, b+8(FP)
RET