cmd/internal/obj/arm: use single BIC for AND with negative-rotated immediate

When AND $imm, Rn, Rd is given a constant whose bitwise complement
fits in a rotated 8-bit ARM immediate (i.e. classified as C_NCON),
the assembler synthesized the operation as MVN $~imm, R11 followed
by AND R11, Rn, Rd. This silently clobbered REGTMP (R11) for an
operation that has a one-instruction encoding: BIC $~imm, Rn, Rd.

Within Go's internal ABI this synthesis is technically legal: R11
is documented REGTMP, reserved for assembler and linker scratch,
and Go-compiled callers do not assume its value is preserved across
arbitrary instructions. So this is a quality-of-implementation
improvement, not a correctness bug fix for first-party Go code.

The rewrite is unconditional within the C_NCON path: the class is
only assigned when immrot(^imm) succeeds, which is exactly the
condition for BIC to encode the immediate directly. AND.S maps to
BIC.S; both set N from bit 31 and Z from result==0, leave V
unchanged, and the C flag from a rotated immediate is the same in
either form. Constants that fall through to C_LCON (literal pool)
are unaffected.

It also stops surprising hand-written assembly ported in from other
toolchains. Outside Go's ABI, on the standard ARM ABI used by
GAS/GCC/clang/etc., R11 is fp -- an ordinary callee-saved register
that hand-written code routinely keeps live across instructions.
Code ported from those toolchains (e.g. CryptoGAMS, OpenSSL) reads
as plain AND but, with the old synthesis, silently expands to two
instructions and trashes R11 in between. After this change a single
AND in Plan 9 syntax encodes as a single ARM instruction, matching
what GAS produces for the same source.

The Go compiler does not currently emit AND with a C_NCON immediate
on ARM, so this only affects hand-written .s files. Building cmd/go
for GOARCH=arm hits exactly three call sites:

  internal/bytealg/compare_arm.s:52  AND $0xfffffffc, R6, R8  -> BIC $3, R6, R8
  internal/bytealg/equal_arm.s:61    AND $0xfffffffc, R1, R6  -> BIC $3, R1, R6
  runtime/asm_arm.s:105              AND $~7, R13             -> BIC $7, R13

Change-Id: I0194961917c8a5b3fb5075f787bf8d1e4020ab40
Reviewed-on: https://go-review.googlesource.com/c/go/+/774862
TryBot-Bypass: Brad Fitzpatrick <bradfitz@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Nicholas Husin <husin@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Brad Fitzpatrick 2026-05-06 23:26:42 +00:00
parent 11a3b27b91
commit 358cf41413
2 changed files with 34 additions and 6 deletions

View file

@ -1089,10 +1089,23 @@ jmp_label_3:
RSC $0xffffff4b, R5 // RSC $4294967115, R5 // b4b0e0e30b50e5e0
RSC.S $0xffffffb5, R2, R3 // RSC.S $4294967221, R2, R3 // 4ab0e0e30b30f2e0
RSC.S $0xffffff4a, R5 // RSC.S $4294967114, R5 // b5b0e0e30b50f5e0
AND $0xffffffaa, R2, R3 // AND $4294967210, R2, R3 // 55b0e0e30b3002e0
AND $0xffffff55, R5 // AND $4294967125, R5 // aab0e0e30b5005e0
AND.S $0xffffffab, R2, R3 // AND.S $4294967211, R2, R3 // 54b0e0e30b3012e0
AND.S $0xffffff54, R5 // AND.S $4294967124, R5 // abb0e0e30b5015e0
AND $0xffffffaa, R2, R3 // AND $4294967210, R2, R3 // 5530c2e3
AND $0xffffff55, R5 // AND $4294967125, R5 // aa50c5e3
AND.S $0xffffffab, R2, R3 // AND.S $4294967211, R2, R3 // 5430d2e3
AND.S $0xffffff54, R5 // AND.S $4294967124, R5 // ab50d5e3
// AND with a negative-rotated immediate: emit a single BIC,
// not the two-instruction MVN+AND synthesis (which clobbers R11).
AND $0xfffffffc, R0, R1 // AND $4294967292, R0, R1 // 0310c0e3
AND $0xfffffff0, R0, R1 // AND $4294967280, R0, R1 // 0f10c0e3
AND $0xffffff00, R0, R1 // AND $4294967040, R0, R1 // ff10c0e3
AND $0xfffffffc, R8 // AND $4294967292, R8 // 0380c8e3
AND.S $0xfffffffc, R0, R1 // AND.S $4294967292, R0, R1 // 0310d0e3
// AND with an irregular immediate (neither rot8 nor ~rot8 fit)
// still goes through the literal-pool synthesis: MOVW pool(PC),
// R11; AND R11, Rn, Rd. Encoded form depends on pool placement,
// so just round-trip the disassembly.
AND $0xaaaaaaaa, R0, R1 // AND $2863311530, R0, R1
AND.S $0xaaaaaaaa, R0, R1 // AND.S $2863311530, R0, R1
ORR $0xffffffaa, R2, R3 // ORR $4294967210, R2, R3 // 55b0e0e30b3082e1
ORR $0xffffff55, R5 // ORR $4294967125, R5 // aab0e0e30b5085e1
ORR.S $0xffffffab, R2, R3 // ORR.S $4294967211, R2, R3 // 54b0e0e30b3092e1

View file

@ -141,8 +141,8 @@ var optab = []Optab{
{AMVN, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0},
{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{AAND, C_NCON, C_REG, C_REG, 114, 4, 0, 0, 0, C_SBIT},
{AAND, C_NCON, C_NONE, C_REG, 114, 4, 0, 0, 0, C_SBIT},
{AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0},
@ -2553,6 +2553,21 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
c.ctxt.Diag("illegal mb option:\n%v", p)
}
o1 |= mbop
case 114: /* AND $C_NCON, [R], R -> BIC $~C_NCON, [R], R */
// AND with an immediate that is not encodable as a rotated 8-bit
// value, but whose bitwise complement is, can be emitted as a
// single BIC instruction without a scratch register. This avoids
// the MVN+AND synthesis used by case 13, which clobbers REGTMP.
c.aclass(&p.From)
o1 = c.oprrr(p, ABIC, int(p.Scond))
o1 |= uint32(immrot(^uint32(c.instoffset)))
rt := int(p.To.Reg)
r := int(p.Reg)
if r == 0 {
r = rt
}
o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12
}
out[0] = o1