mirror of
https://github.com/golang/go.git
synced 2026-06-27 19:30:52 +00:00
cmd/internal/obj/arm: use single BIC for AND with negative-rotated immediate
When AND $imm, Rn, Rd is given a constant whose bitwise complement fits in a rotated 8-bit ARM immediate (i.e. classified as C_NCON), the assembler synthesized the operation as MVN $~imm, R11 followed by AND R11, Rn, Rd. This silently clobbered REGTMP (R11) for an operation that has a one-instruction encoding: BIC $~imm, Rn, Rd. Within Go's internal ABI this synthesis is technically legal: R11 is documented REGTMP, reserved for assembler and linker scratch, and Go-compiled callers do not assume its value is preserved across arbitrary instructions. So this is a quality-of-implementation improvement, not a correctness bug fix for first-party Go code. The rewrite is unconditional within the C_NCON path: the class is only assigned when immrot(^imm) succeeds, which is exactly the condition for BIC to encode the immediate directly. AND.S maps to BIC.S; both set N from bit 31 and Z from result==0, leave V unchanged, and the C flag from a rotated immediate is the same in either form. Constants that fall through to C_LCON (literal pool) are unaffected. It also stops surprising hand-written assembly ported in from other toolchains. Outside Go's ABI, on the standard ARM ABI used by GAS/GCC/clang/etc., R11 is fp -- an ordinary callee-saved register that hand-written code routinely keeps live across instructions. Code ported from those toolchains (e.g. CryptoGAMS, OpenSSL) reads as plain AND but, with the old synthesis, silently expands to two instructions and trashes R11 in between. After this change a single AND in Plan 9 syntax encodes as a single ARM instruction, matching what GAS produces for the same source. The Go compiler does not currently emit AND with a C_NCON immediate on ARM, so this only affects hand-written .s files. Building cmd/go for GOARCH=arm hits exactly three call sites: internal/bytealg/compare_arm.s:52 AND $0xfffffffc, R6, R8 -> BIC $3, R6, R8 internal/bytealg/equal_arm.s:61 AND $0xfffffffc, R1, R6 -> BIC $3, R1, R6 runtime/asm_arm.s:105 AND $~7, R13 -> BIC $7, R13 Change-Id: I0194961917c8a5b3fb5075f787bf8d1e4020ab40 Reviewed-on: https://go-review.googlesource.com/c/go/+/774862 TryBot-Bypass: Brad Fitzpatrick <bradfitz@golang.org> LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Nicholas Husin <husin@google.com> Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
parent
11a3b27b91
commit
358cf41413
2 changed files with 34 additions and 6 deletions
21
src/cmd/asm/internal/asm/testdata/arm.s
vendored
21
src/cmd/asm/internal/asm/testdata/arm.s
vendored
|
|
@ -1089,10 +1089,23 @@ jmp_label_3:
|
|||
RSC $0xffffff4b, R5 // RSC $4294967115, R5 // b4b0e0e30b50e5e0
|
||||
RSC.S $0xffffffb5, R2, R3 // RSC.S $4294967221, R2, R3 // 4ab0e0e30b30f2e0
|
||||
RSC.S $0xffffff4a, R5 // RSC.S $4294967114, R5 // b5b0e0e30b50f5e0
|
||||
AND $0xffffffaa, R2, R3 // AND $4294967210, R2, R3 // 55b0e0e30b3002e0
|
||||
AND $0xffffff55, R5 // AND $4294967125, R5 // aab0e0e30b5005e0
|
||||
AND.S $0xffffffab, R2, R3 // AND.S $4294967211, R2, R3 // 54b0e0e30b3012e0
|
||||
AND.S $0xffffff54, R5 // AND.S $4294967124, R5 // abb0e0e30b5015e0
|
||||
AND $0xffffffaa, R2, R3 // AND $4294967210, R2, R3 // 5530c2e3
|
||||
AND $0xffffff55, R5 // AND $4294967125, R5 // aa50c5e3
|
||||
AND.S $0xffffffab, R2, R3 // AND.S $4294967211, R2, R3 // 5430d2e3
|
||||
AND.S $0xffffff54, R5 // AND.S $4294967124, R5 // ab50d5e3
|
||||
// AND with a negative-rotated immediate: emit a single BIC,
|
||||
// not the two-instruction MVN+AND synthesis (which clobbers R11).
|
||||
AND $0xfffffffc, R0, R1 // AND $4294967292, R0, R1 // 0310c0e3
|
||||
AND $0xfffffff0, R0, R1 // AND $4294967280, R0, R1 // 0f10c0e3
|
||||
AND $0xffffff00, R0, R1 // AND $4294967040, R0, R1 // ff10c0e3
|
||||
AND $0xfffffffc, R8 // AND $4294967292, R8 // 0380c8e3
|
||||
AND.S $0xfffffffc, R0, R1 // AND.S $4294967292, R0, R1 // 0310d0e3
|
||||
// AND with an irregular immediate (neither rot8 nor ~rot8 fit)
|
||||
// still goes through the literal-pool synthesis: MOVW pool(PC),
|
||||
// R11; AND R11, Rn, Rd. Encoded form depends on pool placement,
|
||||
// so just round-trip the disassembly.
|
||||
AND $0xaaaaaaaa, R0, R1 // AND $2863311530, R0, R1
|
||||
AND.S $0xaaaaaaaa, R0, R1 // AND.S $2863311530, R0, R1
|
||||
ORR $0xffffffaa, R2, R3 // ORR $4294967210, R2, R3 // 55b0e0e30b3082e1
|
||||
ORR $0xffffff55, R5 // ORR $4294967125, R5 // aab0e0e30b5085e1
|
||||
ORR.S $0xffffffab, R2, R3 // ORR.S $4294967211, R2, R3 // 54b0e0e30b3092e1
|
||||
|
|
|
|||
|
|
@ -141,8 +141,8 @@ var optab = []Optab{
|
|||
{AMVN, C_NCON, C_NONE, C_REG, 12, 4, 0, 0, 0, 0},
|
||||
{AADD, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{AADD, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{AAND, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{AAND, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{AAND, C_NCON, C_REG, C_REG, 114, 4, 0, 0, 0, C_SBIT},
|
||||
{AAND, C_NCON, C_NONE, C_REG, 114, 4, 0, 0, 0, C_SBIT},
|
||||
{AORR, C_NCON, C_REG, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{AORR, C_NCON, C_NONE, C_REG, 13, 8, 0, 0, 0, C_SBIT},
|
||||
{ACMP, C_NCON, C_REG, C_NONE, 13, 8, 0, 0, 0, 0},
|
||||
|
|
@ -2553,6 +2553,21 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
|
|||
c.ctxt.Diag("illegal mb option:\n%v", p)
|
||||
}
|
||||
o1 |= mbop
|
||||
|
||||
case 114: /* AND $C_NCON, [R], R -> BIC $~C_NCON, [R], R */
|
||||
// AND with an immediate that is not encodable as a rotated 8-bit
|
||||
// value, but whose bitwise complement is, can be emitted as a
|
||||
// single BIC instruction without a scratch register. This avoids
|
||||
// the MVN+AND synthesis used by case 13, which clobbers REGTMP.
|
||||
c.aclass(&p.From)
|
||||
o1 = c.oprrr(p, ABIC, int(p.Scond))
|
||||
o1 |= uint32(immrot(^uint32(c.instoffset)))
|
||||
rt := int(p.To.Reg)
|
||||
r := int(p.Reg)
|
||||
if r == 0 {
|
||||
r = rt
|
||||
}
|
||||
o1 |= (uint32(r)&15)<<16 | (uint32(rt)&15)<<12
|
||||
}
|
||||
|
||||
out[0] = o1
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue