cmd/compile: intrinsify atomic And/Or on arm64

The atomic And/Or operators were added by the CL 528797,
the compiler does not intrinsify them, this CL does it for
arm64.

Also, for the existing atomicAnd/Or operations, the updated
value are not used, but at that time we need a register to
temporarily hold it. Now that we have v.RegTmp, the new value
is not needed anymore. This CL changes it.

The other change is that the existing operations don't use their
result, but now we need the old value and not the new value for
the result.

And this CL alias all of the And/Or operations into sync/atomic
package.

Peformance on an ARMv8.1 machine:
                      old.txt       new.txt
                      sec/op         sec/op         vs base
And32-160            8.716n ± 0%    4.771n ± 1%  -45.26% (p=0.000 n=10)
And32Parallel-160    30.58n ± 2%   26.45n ± 4% -13.49% (p=0.000 n=10)
And64-160            8.750n ± 1%    4.754n ± 0%  -45.67% (p=0.000 n=10)
And64Parallel-160    29.40n ± 3%    25.55n ± 5%  -13.11% (p=0.000 n=10)
Or32-160             8.847n ± 1%    4.754±1%  -46.26% (p=0.000 n=10)
Or32Parallel-160     30.75n ± 3%    26.10n ± 4%  -15.14% (p=0.000 n=10)
Or64-160             8.825n ± 1%    4.766n ± 0%  -46.00% (p=0.000 n=10)
Or64Parallel-160     30.52n ± 5%    25.89n ± 6%  -15.17% (p=0.000 n=10)

For #61395

Change-Id: Ib1d1ac83f7f67dcf67f74d003fadb0f80932b826
Reviewed-on: https://go-review.googlesource.com/c/go/+/584715
Auto-Submit: Austin Clements <austin@google.com>
TryBot-Bypass: Austin Clements <austin@google.com>
Reviewed-by: Austin Clements <austin@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Fannie Zhang <Fannie.Zhang@arm.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
fanzha02 2024-04-10 08:45:02 +00:00 committed by Gopher Robot
parent a5339da341
commit 63c1e141bc
7 changed files with 269 additions and 260 deletions

View file

@ -781,23 +781,30 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p3.To.Type = obj.TYPE_REG
p3.To.Reg = out
case ssa.OpARM64LoweredAtomicAnd8,
case ssa.OpARM64LoweredAtomicAnd64,
ssa.OpARM64LoweredAtomicOr64,
ssa.OpARM64LoweredAtomicAnd32,
ssa.OpARM64LoweredAtomicOr8,
ssa.OpARM64LoweredAtomicOr32:
// LDAXRB/LDAXRW (Rarg0), Rout
// AND/OR Rarg1, Rout
// STLXRB/STLXRB Rout, (Rarg0), Rtmp
ssa.OpARM64LoweredAtomicOr32,
ssa.OpARM64LoweredAtomicAnd8,
ssa.OpARM64LoweredAtomicOr8:
// LDAXR[BW] (Rarg0), Rout
// AND/OR Rarg1, Rout, tmp1
// STLXR[BW] tmp1, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
ld := arm64.ALDAXRB
st := arm64.ASTLXRB
ld := arm64.ALDAXR
st := arm64.ASTLXR
if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
ld = arm64.ALDAXRW
st = arm64.ASTLXRW
}
if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
ld = arm64.ALDAXRB
st = arm64.ASTLXRB
}
r0 := v.Args[0].Reg()
r1 := v.Args[1].Reg()
out := v.Reg0()
tmp := v.RegTmp()
p := s.Prog(ld)
p.From.Type = obj.TYPE_MEM
p.From.Reg = r0
@ -806,11 +813,12 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p1 := s.Prog(v.Op.Asm())
p1.From.Type = obj.TYPE_REG
p1.From.Reg = r1
p1.Reg = out
p1.To.Type = obj.TYPE_REG
p1.To.Reg = out
p1.To.Reg = tmp
p2 := s.Prog(st)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = out
p2.From.Reg = tmp
p2.To.Type = obj.TYPE_MEM
p2.To.Reg = r0
p2.RegTo2 = arm64.REGTMP
@ -819,9 +827,14 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p3.From.Reg = arm64.REGTMP
p3.To.Type = obj.TYPE_BRANCH
p3.To.SetTarget(p)
case ssa.OpARM64LoweredAtomicAnd8Variant,
ssa.OpARM64LoweredAtomicAnd32Variant:
atomic_clear := arm64.ALDCLRALW
ssa.OpARM64LoweredAtomicAnd32Variant,
ssa.OpARM64LoweredAtomicAnd64Variant:
atomic_clear := arm64.ALDCLRALD
if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
atomic_clear = arm64.ALDCLRALW
}
if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
atomic_clear = arm64.ALDCLRALB
}
@ -836,7 +849,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = arm64.REGTMP
// LDCLRALW Rtemp, (Rarg0), Rout
// LDCLRAL[BDW] Rtemp, (Rarg0), Rout
p1 := s.Prog(atomic_clear)
p1.From.Type = obj.TYPE_REG
p1.From.Reg = arm64.REGTMP
@ -844,16 +857,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p1.To.Reg = r0
p1.RegTo2 = out
// AND Rarg1, Rout
p2 := s.Prog(arm64.AAND)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = r1
p2.To.Type = obj.TYPE_REG
p2.To.Reg = out
case ssa.OpARM64LoweredAtomicOr8Variant,
ssa.OpARM64LoweredAtomicOr32Variant:
atomic_or := arm64.ALDORALW
ssa.OpARM64LoweredAtomicOr32Variant,
ssa.OpARM64LoweredAtomicOr64Variant:
atomic_or := arm64.ALDORALD
if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
atomic_or = arm64.ALDORALW
}
if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
atomic_or = arm64.ALDORALB
}
@ -861,7 +871,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
r1 := v.Args[1].Reg()
out := v.Reg0()
// LDORALW Rarg1, (Rarg0), Rout
// LDORAL[BDW] Rarg1, (Rarg0), Rout
p := s.Prog(atomic_or)
p.From.Type = obj.TYPE_REG
p.From.Reg = r1
@ -869,13 +879,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = r0
p.RegTo2 = out
// ORR Rarg1, Rout
p2 := s.Prog(arm64.AORR)
p2.From.Type = obj.TYPE_REG
p2.From.Reg = r1
p2.To.Type = obj.TYPE_REG
p2.To.Reg = out
case ssa.OpARM64MOVBreg,
ssa.OpARM64MOVBUreg,
ssa.OpARM64MOVHreg,

View file

@ -579,11 +579,11 @@
(AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...)
(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...)
// Currently the updated value is not used, but we need a register to temporarily hold it.
(AtomicAnd(8|32) ptr val mem) => (Select1 (LoweredAtomicAnd(8|32) ptr val mem))
(AtomicOr(8|32) ptr val mem) => (Select1 (LoweredAtomicOr(8|32) ptr val mem))
(AtomicAnd(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicAnd(8|32)Variant ptr val mem))
(AtomicOr(8|32)Variant ptr val mem) => (Select1 (LoweredAtomicOr(8|32)Variant ptr val mem))
// Return old contents.
(AtomicAnd(64|32|8) ...) => (LoweredAtomicAnd(64|32|8) ...)
(AtomicOr(64|32|8) ...) => (LoweredAtomicOr(64|32|8) ...)
(AtomicAnd(64|32|8)Variant ...) => (LoweredAtomicAnd(64|32|8)Variant ...)
(AtomicOr(64|32|8)Variant ...) => (LoweredAtomicOr(64|32|8)Variant ...)
// Write barrier.
(WB ...) => (LoweredWB ...)

View file

@ -707,29 +707,31 @@ func init() {
{name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
// *arg0 &= (|=) arg1. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
// LDAXR (Rarg0), Rout
// AND/OR Rarg1, Rout
// STLXR Rout, (Rarg0), Rtmp
// AND/OR Rarg1, Rout, tempReg
// STLXR tempReg, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
{name: "LoweredAtomicAnd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
{name: "LoweredAtomicOr64", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
{name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
{name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true, needIntTemp: true},
// atomic and/or variant.
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
// *arg0 &= (|=) arg1. arg2=mem. returns <old content of *arg0, memory>. auxint must be zero.
// AND:
// MNV Rarg1, Rtemp
// LDANDALB Rtemp, (Rarg0), Rout
// AND Rarg1, Rout
// OR:
// LDORALB Rarg1, (Rarg0), Rout
// ORR Rarg1, Rout
{name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAnd64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=mem, auxint=# of buffer entries needed
// It saves all GP registers if necessary,

View file

@ -609,25 +609,31 @@ var genericOps = []opData{
{name: "AtomicCompareAndSwap32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory.
{name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
{name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
{name: "AtomicAnd8", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr8", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicAnd64", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicAnd32", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr64", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr32", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
// Atomic operation variants
// These variants have the same semantics as above atomic operations.
// But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection.
// Currently, they are used on ARM64 only.
// On ARM64, these are used when the LSE hardware feature is avaliable (either known at compile time or detected at runtime). If LSE is not avaliable,
// then the basic atomic oprations are used instead.
// These are not currently used on any other platform.
{name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
{name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
{name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
{name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
{name: "AtomicAnd8Variant", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr8Variant", argLength: 3, typ: "(Uint8, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicAnd64Variant", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr64Variant", argLength: 3, typ: "(Uint64, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicAnd32Variant", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicOr32Variant", argLength: 3, typ: "(Uint32, Mem)", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns old contents of *arg0 and new memory.
// Publication barrier
{name: "PubBarrier", argLength: 1, hasSideEffects: true}, // Do data barrier. arg0=memory.

View file

@ -1717,12 +1717,16 @@ const (
OpARM64LoweredAtomicCas64Variant
OpARM64LoweredAtomicCas32Variant
OpARM64LoweredAtomicAnd8
OpARM64LoweredAtomicAnd32
OpARM64LoweredAtomicOr8
OpARM64LoweredAtomicAnd64
OpARM64LoweredAtomicOr64
OpARM64LoweredAtomicAnd32
OpARM64LoweredAtomicOr32
OpARM64LoweredAtomicAnd8Variant
OpARM64LoweredAtomicAnd32Variant
OpARM64LoweredAtomicOr8Variant
OpARM64LoweredAtomicAnd64Variant
OpARM64LoweredAtomicOr64Variant
OpARM64LoweredAtomicAnd32Variant
OpARM64LoweredAtomicOr32Variant
OpARM64LoweredWB
OpARM64LoweredPanicBoundsA
@ -3226,8 +3230,10 @@ const (
OpAtomicCompareAndSwap64
OpAtomicCompareAndSwapRel32
OpAtomicAnd8
OpAtomicAnd32
OpAtomicOr8
OpAtomicAnd64
OpAtomicAnd32
OpAtomicOr64
OpAtomicOr32
OpAtomicAdd32Variant
OpAtomicAdd64Variant
@ -3236,8 +3242,10 @@ const (
OpAtomicCompareAndSwap32Variant
OpAtomicCompareAndSwap64Variant
OpAtomicAnd8Variant
OpAtomicAnd32Variant
OpAtomicOr8Variant
OpAtomicAnd64Variant
OpAtomicOr64Variant
OpAtomicAnd32Variant
OpAtomicOr32Variant
OpPubBarrier
OpClobber
@ -23000,24 +23008,7 @@ var opcodeTable = [...]opInfo{
name: "LoweredAtomicAnd8",
argLen: 3,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: arm64.AAND,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicAnd32",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
@ -23036,6 +23027,7 @@ var opcodeTable = [...]opInfo{
name: "LoweredAtomicOr8",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
@ -23050,10 +23042,68 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredAtomicAnd64",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: arm64.AAND,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicOr64",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: arm64.AORR,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicAnd32",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
asm: arm64.AAND,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicOr32",
argLen: 3,
resultNotInArgs: true,
needIntTemp: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
@ -23086,7 +23136,23 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "LoweredAtomicAnd32Variant",
name: "LoweredAtomicOr8Variant",
argLen: 3,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicAnd64Variant",
argLen: 3,
resultNotInArgs: true,
faultOnNilArg0: true,
@ -23103,7 +23169,7 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "LoweredAtomicOr8Variant",
name: "LoweredAtomicOr64Variant",
argLen: 3,
resultNotInArgs: true,
faultOnNilArg0: true,
@ -23118,6 +23184,23 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredAtomicAnd32Variant",
argLen: 3,
resultNotInArgs: true,
faultOnNilArg0: true,
hasSideEffects: true,
unsafePoint: true,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "LoweredAtomicOr32Variant",
argLen: 3,
@ -40649,6 +40732,18 @@ var opcodeTable = [...]opInfo{
hasSideEffects: true,
generic: true,
},
{
name: "AtomicOr8",
argLen: 3,
hasSideEffects: true,
generic: true,
},
{
name: "AtomicAnd64",
argLen: 3,
hasSideEffects: true,
generic: true,
},
{
name: "AtomicAnd32",
argLen: 3,
@ -40656,7 +40751,7 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
name: "AtomicOr8",
name: "AtomicOr64",
argLen: 3,
hasSideEffects: true,
generic: true,
@ -40710,13 +40805,25 @@ var opcodeTable = [...]opInfo{
generic: true,
},
{
name: "AtomicAnd32Variant",
name: "AtomicOr8Variant",
argLen: 3,
hasSideEffects: true,
generic: true,
},
{
name: "AtomicOr8Variant",
name: "AtomicAnd64Variant",
argLen: 3,
hasSideEffects: true,
generic: true,
},
{
name: "AtomicOr64Variant",
argLen: 3,
hasSideEffects: true,
generic: true,
},
{
name: "AtomicAnd32Variant",
argLen: 3,
hasSideEffects: true,
generic: true,

View file

@ -470,13 +470,23 @@ func rewriteValueARM64(v *Value) bool {
v.Op = OpARM64LoweredAtomicAdd64Variant
return true
case OpAtomicAnd32:
return rewriteValueARM64_OpAtomicAnd32(v)
v.Op = OpARM64LoweredAtomicAnd32
return true
case OpAtomicAnd32Variant:
return rewriteValueARM64_OpAtomicAnd32Variant(v)
v.Op = OpARM64LoweredAtomicAnd32Variant
return true
case OpAtomicAnd64:
v.Op = OpARM64LoweredAtomicAnd64
return true
case OpAtomicAnd64Variant:
v.Op = OpARM64LoweredAtomicAnd64Variant
return true
case OpAtomicAnd8:
return rewriteValueARM64_OpAtomicAnd8(v)
v.Op = OpARM64LoweredAtomicAnd8
return true
case OpAtomicAnd8Variant:
return rewriteValueARM64_OpAtomicAnd8Variant(v)
v.Op = OpARM64LoweredAtomicAnd8Variant
return true
case OpAtomicCompareAndSwap32:
v.Op = OpARM64LoweredAtomicCas32
return true
@ -514,13 +524,23 @@ func rewriteValueARM64(v *Value) bool {
v.Op = OpARM64LDAR
return true
case OpAtomicOr32:
return rewriteValueARM64_OpAtomicOr32(v)
v.Op = OpARM64LoweredAtomicOr32
return true
case OpAtomicOr32Variant:
return rewriteValueARM64_OpAtomicOr32Variant(v)
v.Op = OpARM64LoweredAtomicOr32Variant
return true
case OpAtomicOr64:
v.Op = OpARM64LoweredAtomicOr64
return true
case OpAtomicOr64Variant:
v.Op = OpARM64LoweredAtomicOr64Variant
return true
case OpAtomicOr8:
return rewriteValueARM64_OpAtomicOr8(v)
v.Op = OpARM64LoweredAtomicOr8
return true
case OpAtomicOr8Variant:
return rewriteValueARM64_OpAtomicOr8Variant(v)
v.Op = OpARM64LoweredAtomicOr8Variant
return true
case OpAtomicStore32:
v.Op = OpARM64STLRW
return true
@ -17783,158 +17803,6 @@ func rewriteValueARM64_OpAddr(v *Value) bool {
return true
}
}
func rewriteValueARM64_OpAtomicAnd32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicAnd32 ptr val mem)
// result: (Select1 (LoweredAtomicAnd32 ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32, types.NewTuple(typ.UInt32, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicAnd32Variant ptr val mem)
// result: (Select1 (LoweredAtomicAnd32Variant ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicAnd8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicAnd8 ptr val mem)
// result: (Select1 (LoweredAtomicAnd8 ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8, types.NewTuple(typ.UInt8, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicAnd8Variant ptr val mem)
// result: (Select1 (LoweredAtomicAnd8Variant ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicOr32(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicOr32 ptr val mem)
// result: (Select1 (LoweredAtomicOr32 ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32, types.NewTuple(typ.UInt32, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicOr32Variant ptr val mem)
// result: (Select1 (LoweredAtomicOr32Variant ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicOr8(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicOr8 ptr val mem)
// result: (Select1 (LoweredAtomicOr8 ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8, types.NewTuple(typ.UInt8, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool {
v_2 := v.Args[2]
v_1 := v.Args[1]
v_0 := v.Args[0]
b := v.Block
typ := &b.Func.Config.Types
// match: (AtomicOr8Variant ptr val mem)
// result: (Select1 (LoweredAtomicOr8Variant ptr val mem))
for {
ptr := v_0
val := v_1
mem := v_2
v.reset(OpSelect1)
v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem))
v0.AddArg3(ptr, val, mem)
v.AddArg(v0)
return true
}
}
func rewriteValueARM64_OpAvg64u(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View file

@ -4451,16 +4451,16 @@ func InitTables() {
}
}
atomicXchgXaddEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) {
atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) {
v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
}
addF("internal/runtime/atomic", "Xchg",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicXchgXaddEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Xchg64",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicXchgXaddEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Xadd",
@ -4479,10 +4479,10 @@ func InitTables() {
sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
addF("internal/runtime/atomic", "Xadd",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicXchgXaddEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Xadd64",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicXchgXaddEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Cas",
@ -4545,21 +4545,29 @@ func InitTables() {
},
sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
atomicAndOrEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind) {
s.vars[memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem())
}
addF("internal/runtime/atomic", "And8",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, types.TNIL, atomicAndOrEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "And",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TNIL, atomicAndOrEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, types.TUINT8, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Or8",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, types.TNIL, atomicAndOrEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, types.TUINT8, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "And64",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64, ssa.OpAtomicAnd64Variant, types.TUINT64, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "And32",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "And",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Or64",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64, ssa.OpAtomicOr64Variant, types.TUINT64, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Or32",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
addF("internal/runtime/atomic", "Or",
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TNIL, atomicAndOrEmitterARM64),
makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, types.TUINT32, atomicEmitterARM64),
sys.ARM64)
// Aliases for atomic load operations
@ -4609,6 +4617,10 @@ func InitTables() {
alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...)
alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...)
// Aliases for atomic And/Or operations
alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64)
alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64)
/******** math ********/
addF("math", "sqrt",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
@ -5085,6 +5097,17 @@ func InitTables() {
alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...)
alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...)
alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64)
alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64)
alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64)
alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64)
alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64)
alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64)
alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64)
alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64)
alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64)
alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64)
/******** math/big ********/
alias("math/big", "mulWW", "math/bits", "Mul64", p8...)
}