cmd/compile: move arm64 over to new bounds check strategy

For all the static bounds checks in cmd/go, we have:

    6877    just a single instruction (the call itself)
    139     needs an additional reg-reg move
    602     needs an additional constant load
    25      needs some other instruction

that's ~90% implemented using just a single instruction.

Reduces the text size of cmd/go by ~0.8%.
Total binary size is just barely smaller, ~0.2%. (The difference
is the new pcdata table.)

Change-Id: I416e9c196f5d8d0e8f08e191e6df3045e11dccbe
Reviewed-on: https://go-review.googlesource.com/c/go/+/682496
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
This commit is contained in:
Keith Randall 2025-06-18 15:06:55 -07:00
parent 3024785b92
commit 394d0bee8d
7 changed files with 228 additions and 153 deletions

View file

@ -16,6 +16,7 @@ import (
"cmd/compile/internal/types"
"cmd/internal/obj"
"cmd/internal/obj/arm64"
"internal/abi"
)
// loadByType returns the load instruction of the given type.
@ -1122,12 +1123,91 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
// AuxInt encodes how many buffer entries we need.
p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
case ssa.OpARM64LoweredPanicBoundsA, ssa.OpARM64LoweredPanicBoundsB, ssa.OpARM64LoweredPanicBoundsC:
p := s.Prog(obj.ACALL)
case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
// Compute the constant we put in the PCData entry for this call.
code, signed := ssa.BoundsKind(v.AuxInt).Code()
xIsReg := false
yIsReg := false
xVal := 0
yVal := 0
switch v.Op {
case ssa.OpARM64LoweredPanicBoundsRR:
xIsReg = true
xVal = int(v.Args[0].Reg() - arm64.REG_R0)
yIsReg = true
yVal = int(v.Args[1].Reg() - arm64.REG_R0)
case ssa.OpARM64LoweredPanicBoundsRC:
xIsReg = true
xVal = int(v.Args[0].Reg() - arm64.REG_R0)
c := v.Aux.(ssa.PanicBoundsC).C
if c >= 0 && c <= abi.BoundsMaxConst {
yVal = int(c)
} else {
// Move constant to a register
yIsReg = true
if yVal == xVal {
yVal = 1
}
p := s.Prog(arm64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = c
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REG_R0 + int16(yVal)
}
case ssa.OpARM64LoweredPanicBoundsCR:
yIsReg = true
yVal := int(v.Args[0].Reg() - arm64.REG_R0)
c := v.Aux.(ssa.PanicBoundsC).C
if c >= 0 && c <= abi.BoundsMaxConst {
xVal = int(c)
} else {
// Move constant to a register
if xVal == yVal {
xVal = 1
}
p := s.Prog(arm64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = c
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REG_R0 + int16(xVal)
}
case ssa.OpARM64LoweredPanicBoundsCC:
c := v.Aux.(ssa.PanicBoundsCC).Cx
if c >= 0 && c <= abi.BoundsMaxConst {
xVal = int(c)
} else {
// Move constant to a register
xIsReg = true
p := s.Prog(arm64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = c
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REG_R0 + int16(xVal)
}
c = v.Aux.(ssa.PanicBoundsCC).Cy
if c >= 0 && c <= abi.BoundsMaxConst {
yVal = int(c)
} else {
// Move constant to a register
yIsReg = true
yVal = 1
p := s.Prog(arm64.AMOVD)
p.From.Type = obj.TYPE_CONST
p.From.Offset = c
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REG_R0 + int16(yVal)
}
}
c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
p := s.Prog(obj.APCDATA)
p.From.SetConst(abi.PCDATA_PanicBounds)
p.To.SetConst(int64(c))
p = s.Prog(obj.ACALL)
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_EXTERN
p.To.Sym = ssagen.BoundsCheckFunc[v.AuxInt]
s.UseArgs(16) // space used in callee args area by assembly stubs
p.To.Sym = ir.Syms.PanicBounds
case ssa.OpARM64LoweredNilCheck:
// Issue a load which will fault if arg is nil.
p := s.Prog(arm64.AMOVB)

View file

@ -601,9 +601,11 @@
// Publication barrier (0xe is ST option)
(PubBarrier mem) => (DMB [0xe] mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 0 => (LoweredPanicBoundsA [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 1 => (LoweredPanicBoundsB [kind] x y mem)
(PanicBounds [kind] x y mem) && boundsABI(kind) == 2 => (LoweredPanicBoundsC [kind] x y mem)
(PanicBounds ...) => (LoweredPanicBoundsRR ...)
(LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem) => (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
(LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem) => (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
(LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
(LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem) => (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
// Optimizations

View file

@ -144,11 +144,8 @@ func init() {
gpspsbg = gpspg | buildReg("SB")
fp = buildReg("F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31")
callerSave = gp | fp | buildReg("g") // runtime.setg (and anything calling it) may clobber g
r0 = buildReg("R0")
r1 = buildReg("R1")
r2 = buildReg("R2")
r3 = buildReg("R3")
rz = buildReg("ZERO")
first16 = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15")
)
// Common regInfo
var (
@ -760,12 +757,15 @@ func init() {
// Returns a pointer to a write barrier buffer in R25.
{name: "LoweredWB", argLength: 1, reg: regInfo{clobbers: (callerSave &^ gpg) | buildReg("R16 R17 R30"), outputs: []regMask{buildReg("R25")}}, clobberFlags: true, aux: "Int64"},
// There are three of these functions so that they can have three different register inputs.
// When we check 0 <= c <= cap (A), then 0 <= b <= c (B), then 0 <= a <= b (C), we want the
// default registers to match so we don't need to copy registers around unnecessarily.
{name: "LoweredPanicBoundsA", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r2, r3}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
{name: "LoweredPanicBoundsB", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r1, r2}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
{name: "LoweredPanicBoundsC", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{r0, r1}}, typ: "Mem", call: true}, // arg0=idx, arg1=len, arg2=mem, returns memory. AuxInt contains report code (see PanicBounds in generic.go).
// LoweredPanicBoundsRR takes x and y, two values that caused a bounds check to fail.
// the RC and CR versions are used when one of the arguments is a constant. CC is used
// when both are constant (normally both 0, as prove derives the fact that a [0] bounds
// failure means the length must have also been 0).
// AuxInt contains a report code (see PanicBounds in genericOps.go).
{name: "LoweredPanicBoundsRR", argLength: 3, aux: "Int64", reg: regInfo{inputs: []regMask{first16, first16}}, typ: "Mem", call: true}, // arg0=x, arg1=y, arg2=mem, returns memory.
{name: "LoweredPanicBoundsRC", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true}, // arg0=x, arg1=mem, returns memory.
{name: "LoweredPanicBoundsCR", argLength: 2, aux: "PanicBoundsC", reg: regInfo{inputs: []regMask{first16}}, typ: "Mem", call: true}, // arg0=y, arg1=mem, returns memory.
{name: "LoweredPanicBoundsCC", argLength: 1, aux: "PanicBoundsCC", reg: regInfo{}, typ: "Mem", call: true}, // arg0=mem, returns memory.
// Prefetch instruction
// Do prefetch arg0 address with option aux. arg0=addr, arg1=memory, aux=option.

View file

@ -1757,9 +1757,10 @@ const (
OpARM64LoweredAtomicAnd32Variant
OpARM64LoweredAtomicOr32Variant
OpARM64LoweredWB
OpARM64LoweredPanicBoundsA
OpARM64LoweredPanicBoundsB
OpARM64LoweredPanicBoundsC
OpARM64LoweredPanicBoundsRR
OpARM64LoweredPanicBoundsRC
OpARM64LoweredPanicBoundsCR
OpARM64LoweredPanicBoundsCC
OpARM64PRFM
OpARM64DMB
OpARM64ZERO
@ -23678,41 +23679,46 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "LoweredPanicBoundsA",
name: "LoweredPanicBoundsRR",
auxType: auxInt64,
argLen: 3,
call: true,
reg: regInfo{
inputs: []inputInfo{
{0, 4}, // R2
{1, 8}, // R3
{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
{1, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "LoweredPanicBoundsB",
auxType: auxInt64,
argLen: 3,
name: "LoweredPanicBoundsRC",
auxType: auxPanicBoundsC,
argLen: 2,
call: true,
reg: regInfo{
inputs: []inputInfo{
{0, 2}, // R1
{1, 4}, // R2
{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "LoweredPanicBoundsC",
auxType: auxInt64,
argLen: 3,
name: "LoweredPanicBoundsCR",
auxType: auxPanicBoundsC,
argLen: 2,
call: true,
reg: regInfo{
inputs: []inputInfo{
{0, 1}, // R0
{1, 2}, // R1
{0, 65535}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "LoweredPanicBoundsCC",
auxType: auxPanicBoundsCC,
argLen: 1,
call: true,
reg: regInfo{},
},
{
name: "PRFM",
auxType: auxInt64,

View file

@ -180,6 +180,12 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpARM64LessThanNoov(v)
case OpARM64LessThanU:
return rewriteValueARM64_OpARM64LessThanU(v)
case OpARM64LoweredPanicBoundsCR:
return rewriteValueARM64_OpARM64LoweredPanicBoundsCR(v)
case OpARM64LoweredPanicBoundsRC:
return rewriteValueARM64_OpARM64LoweredPanicBoundsRC(v)
case OpARM64LoweredPanicBoundsRR:
return rewriteValueARM64_OpARM64LoweredPanicBoundsRR(v)
case OpARM64MADD:
return rewriteValueARM64_OpARM64MADD(v)
case OpARM64MADDW:
@ -936,7 +942,8 @@ func rewriteValueARM64(v *Value) bool {
v.Op = OpARM64OR
return true
case OpPanicBounds:
return rewriteValueARM64_OpPanicBounds(v)
v.Op = OpARM64LoweredPanicBoundsRR
return true
case OpPopCount16:
return rewriteValueARM64_OpPopCount16(v)
case OpPopCount32:
@ -7042,6 +7049,86 @@ func rewriteValueARM64_OpARM64LessThanU(v *Value) bool {
}
return false
}
func rewriteValueARM64_OpARM64LoweredPanicBoundsCR(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (LoweredPanicBoundsCR [kind] {p} (MOVDconst [c]) mem)
// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:p.C, Cy:c}} mem)
for {
kind := auxIntToInt64(v.AuxInt)
p := auxToPanicBoundsC(v.Aux)
if v_0.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_0.AuxInt)
mem := v_1
v.reset(OpARM64LoweredPanicBoundsCC)
v.AuxInt = int64ToAuxInt(kind)
v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: p.C, Cy: c})
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64LoweredPanicBoundsRC(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (LoweredPanicBoundsRC [kind] {p} (MOVDconst [c]) mem)
// result: (LoweredPanicBoundsCC [kind] {PanicBoundsCC{Cx:c, Cy:p.C}} mem)
for {
kind := auxIntToInt64(v.AuxInt)
p := auxToPanicBoundsC(v.Aux)
if v_0.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_0.AuxInt)
mem := v_1
v.reset(OpARM64LoweredPanicBoundsCC)
v.AuxInt = int64ToAuxInt(kind)
v.Aux = panicBoundsCCToAux(PanicBoundsCC{Cx: c, Cy: p.C})
v.AddArg(mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64LoweredPanicBoundsRR(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (LoweredPanicBoundsRR [kind] x (MOVDconst [c]) mem)
// result: (LoweredPanicBoundsRC [kind] x {PanicBoundsC{C:c}} mem)
for {
kind := auxIntToInt64(v.AuxInt)
x := v_0
if v_1.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_1.AuxInt)
mem := v_2
v.reset(OpARM64LoweredPanicBoundsRC)
v.AuxInt = int64ToAuxInt(kind)
v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
v.AddArg2(x, mem)
return true
}
// match: (LoweredPanicBoundsRR [kind] (MOVDconst [c]) y mem)
// result: (LoweredPanicBoundsCR [kind] {PanicBoundsC{C:c}} y mem)
for {
kind := auxIntToInt64(v.AuxInt)
if v_0.Op != OpARM64MOVDconst {
break
}
c := auxIntToInt64(v_0.AuxInt)
y := v_1
mem := v_2
v.reset(OpARM64LoweredPanicBoundsCR)
v.AuxInt = int64ToAuxInt(kind)
v.Aux = panicBoundsCToAux(PanicBoundsC{C: c})
v.AddArg2(y, mem)
return true
}
return false
}
func rewriteValueARM64_OpARM64MADD(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
@ -19850,60 +19937,6 @@ func rewriteValueARM64_OpOffPtr(v *Value) bool {
return true
}
}
func rewriteValueARM64_OpPanicBounds(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 0
// result: (LoweredPanicBoundsA [kind] x y mem)
for {
kind := auxIntToInt64(v.AuxInt)
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 0) {
break
}
v.reset(OpARM64LoweredPanicBoundsA)
v.AuxInt = int64ToAuxInt(kind)
v.AddArg3(x, y, mem)
return true
}
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 1
// result: (LoweredPanicBoundsB [kind] x y mem)
for {
kind := auxIntToInt64(v.AuxInt)
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 1) {
break
}
v.reset(OpARM64LoweredPanicBoundsB)
v.AuxInt = int64ToAuxInt(kind)
v.AddArg3(x, y, mem)
return true
}
// match: (PanicBounds [kind] x y mem)
// cond: boundsABI(kind) == 2
// result: (LoweredPanicBoundsC [kind] x y mem)
for {
kind := auxIntToInt64(v.AuxInt)
x := v_0
y := v_1
mem := v_2
if !(boundsABI(kind) == 2) {
break
}
v.reset(OpARM64LoweredPanicBoundsC)
v.AuxInt = int64ToAuxInt(kind)
v.AddArg3(x, y, mem)
return true
}
return false
}
func rewriteValueARM64_OpPopCount16(v *Value) bool {
v_0 := v.Args[0]
b := v.Block

View file

@ -98,5 +98,7 @@ func (b *IntArgRegBitmap) Set(i int) {
//
//go:nosplit
func (b *IntArgRegBitmap) Get(i int) bool {
return b[i/8]&(uint8(1)<<(i%8)) != 0
// Compute p=&b[i/8], but without a bounds check. We don't have the stack for it.
p := (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(b)) + uintptr(i/8)))
return *p&(uint8(1)<<(i%8)) != 0
}

View file

@ -1574,70 +1574,22 @@ TEXT runtime·debugCallPanicked(SB),NOSPLIT,$16-16
BREAK
RET
// Note: these functions use a special calling convention to save generated code space.
// Arguments are passed in registers, but the space for those arguments are allocated
// in the caller's stack frame. These stubs write the args into that stack space and
// then tail call to the corresponding runtime handler.
// The tail call makes these stubs disappear in backtraces.
//
// Defined as ABIInternal since the compiler generates ABIInternal
// calls to it directly and it does not use the stack-based Go ABI.
TEXT runtime·panicIndex<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicIndex<ABIInternal>(SB)
TEXT runtime·panicIndexU<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicIndexU<ABIInternal>(SB)
TEXT runtime·panicSliceAlen<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSliceAlen<ABIInternal>(SB)
TEXT runtime·panicSliceAlenU<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSliceAlenU<ABIInternal>(SB)
TEXT runtime·panicSliceAcap<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSliceAcap<ABIInternal>(SB)
TEXT runtime·panicSliceAcapU<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSliceAcapU<ABIInternal>(SB)
TEXT runtime·panicSliceB<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicSliceB<ABIInternal>(SB)
TEXT runtime·panicSliceBU<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicSliceBU<ABIInternal>(SB)
TEXT runtime·panicSlice3Alen<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R2, R0
MOVD R3, R1
JMP runtime·goPanicSlice3Alen<ABIInternal>(SB)
TEXT runtime·panicSlice3AlenU<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R2, R0
MOVD R3, R1
JMP runtime·goPanicSlice3AlenU<ABIInternal>(SB)
TEXT runtime·panicSlice3Acap<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R2, R0
MOVD R3, R1
JMP runtime·goPanicSlice3Acap<ABIInternal>(SB)
TEXT runtime·panicSlice3AcapU<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R2, R0
MOVD R3, R1
JMP runtime·goPanicSlice3AcapU<ABIInternal>(SB)
TEXT runtime·panicSlice3B<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSlice3B<ABIInternal>(SB)
TEXT runtime·panicSlice3BU<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R1, R0
MOVD R2, R1
JMP runtime·goPanicSlice3BU<ABIInternal>(SB)
TEXT runtime·panicSlice3C<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicSlice3C<ABIInternal>(SB)
TEXT runtime·panicSlice3CU<ABIInternal>(SB),NOSPLIT,$0-16
JMP runtime·goPanicSlice3CU<ABIInternal>(SB)
TEXT runtime·panicSliceConvert<ABIInternal>(SB),NOSPLIT,$0-16
MOVD R2, R0
MOVD R3, R1
JMP runtime·goPanicSliceConvert<ABIInternal>(SB)
TEXT runtime·panicBounds<ABIInternal>(SB),NOSPLIT,$144-0
NO_LOCAL_POINTERS
// Save all 16 int registers that could have an index in them.
// They may be pointers, but if they are they are dead.
STP (R0, R1), 24(RSP)
STP (R2, R3), 40(RSP)
STP (R4, R5), 56(RSP)
STP (R6, R7), 72(RSP)
STP (R8, R9), 88(RSP)
STP (R10, R11), 104(RSP)
STP (R12, R13), 120(RSP)
STP (R14, R15), 136(RSP)
MOVD LR, R0 // PC immediately after call to panicBounds
ADD $24, RSP, R1 // pointer to save area
CALL runtime·panicBounds64<ABIInternal>(SB)
RET
TEXT ·getfp<ABIInternal>(SB),NOSPLIT|NOFRAME,$0
MOVD R29, R0