cmd/compile: remove bit operations that modify memory directly

These operations (BT{S,R,C}{Q,L}modify) are quite a bit slower than
other ways of doing the same thing.

Without the BTxmodify operations, there are two fallback ways the compiler
performs these operations: AND/OR/XOR operations directly on memory, or
load-BTx-write sequences. The compiler kinda chooses one arbitrarily
depending on rewrite rule application order. Currently, it uses
load-BTx-write for the Const benchmarks and AND/OR/XOR directly to memory
for the non-Const benchmarks. TBD, someone might investigate which of
the two fallback strategies is really better. For now, they are both
better than BTx ops.

name              old time/op  new time/op  delta
BitSet-8          1.09µs ± 2%  0.64µs ± 5%  -41.60%  (p=0.000 n=9+10)
BitClear-8        1.15µs ± 3%  0.68µs ± 6%  -41.00%  (p=0.000 n=10+10)
BitToggle-8       1.18µs ± 4%  0.73µs ± 2%  -38.36%  (p=0.000 n=10+8)
BitSetConst-8     37.0ns ± 7%  25.8ns ± 2%  -30.24%  (p=0.000 n=10+10)
BitClearConst-8   30.7ns ± 2%  25.0ns ±12%  -18.46%  (p=0.000 n=10+10)
BitToggleConst-8  36.9ns ± 1%  23.8ns ± 3%  -35.46%  (p=0.000 n=9+10)

Fixes #45790
Update #45242

Change-Id: Ie33a72dc139f261af82db15d446cd0855afb4e59
Reviewed-on: https://go-review.googlesource.com/c/go/+/318149
Trust: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Ben Shi <powerman1st@163.com>
This commit is contained in:
Keith Randall 2021-05-07 14:14:39 -07:00 committed by Ben Shi
parent f24eac4771
commit b211fe0058
7 changed files with 81 additions and 1385 deletions

View file

@ -756,7 +756,6 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore, case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify, ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify: ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
@ -804,8 +803,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
} }
fallthrough fallthrough
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify, case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
sc := v.AuxValAndOff() sc := v.AuxValAndOff()
off := sc.Off64() off := sc.Off64()
val := sc.Val64() val := sc.Val64()

View file

@ -624,14 +624,6 @@
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b) // Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y) (OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
(ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
(BTSLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
(BTSQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
(XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) <t> x) mem) =>
(BTCLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) <t> x) mem) =>
(BTCQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Convert ORconst into BTS, if the code gets smaller, with boundary being // Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@ -654,10 +646,6 @@
=> (BTRQconst [int8(log64(^c))] x) => (BTRQconst [int8(log64(^c))] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRLconst [int8(log32(^c))] x) => (BTRLconst [int8(log32(^c))] x)
(ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) <t> x)) mem) =>
(BTRLmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) <t> x)) mem) =>
(BTRQmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Special-case bit patterns on first/last bit. // Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
@ -1126,14 +1114,14 @@
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDQconst [off2] base) mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) => ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDQconst [off2] base) mem) && ValAndOff(valoff1).canAdd32(off2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem) ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {sym} base mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) => ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDQconst [off2] base) val mem) && is32Bit(int64(off1)+int64(off2)) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {sym} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
// Fold constants into stores. // Fold constants into stores.
(MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) => (MOVQstore [off] {sym} ptr (MOVQconst [c]) mem) && validVal(c) =>
@ -1181,18 +1169,18 @@
((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem) ((ADD|SUB|MUL|DIV)SDload [off1] {sym1} val (LEAQ [off2] {sym2} base) mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem) ((ADD|SUB|MUL|DIV)SDload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) ((ADD|AND|OR|XOR)Qconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR)Qconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem) ((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAQ [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) => && ValAndOff(valoff1).canAdd32(off2) && canMergeSym(sym1, sym2) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem) ((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).addOffset32(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAQ [off2] {sym2} base) val mem)
&& is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
// fold LEAQs together // fold LEAQs together
(LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) => (LEAQ [off1] {sym1} (LEAQ [off2] {sym2} x)) && is32Bit(int64(off1)+int64(off2)) && canMergeSym(sym1, sym2) =>
@ -2078,13 +2066,9 @@
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((BTC|BTR|BTS)Lmodify [off] {sym} ptr (ANDLconst <t> [31] x) mem)
(MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => (MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem)
(MOVQstore {sym} [off] ptr y:((BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) <t> x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((BTC|BTR|BTS)Qmodify [off] {sym} ptr (ANDQconst <t> [63] x) mem)
// Merge ADDQconst and LEAQ into atomic loads. // Merge ADDQconst and LEAQ into atomic loads.
(MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) =>
@ -2138,12 +2122,12 @@
(MOVWQZX (MOVBQZX x)) => (MOVBQZX x) (MOVWQZX (MOVBQZX x)) => (MOVBQZX x)
(MOVBQZX (MOVBQZX x)) => (MOVBQZX x) (MOVBQZX (MOVBQZX x)) => (MOVBQZX x)
(MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) (MOVQstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Qconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) ((ADD|AND|OR|XOR)Qconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
(MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) (MOVLstore [off] {sym} ptr a:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem)
&& isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) => && isSamePtr(ptr, ptr2) && a.Uses == 1 && l.Uses == 1 && clobber(l, a) =>
((ADD|AND|OR|XOR|BTC|BTR|BTS)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem) ((ADD|AND|OR|XOR)Lconstmodify {sym} [makeValAndOff(int32(c),off)] ptr mem)
// float <-> int register moves, with no conversion. // float <-> int register moves, with no conversion.
// These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}. // These come up when compiling math.{Float{32,64}bits,Float{32,64}frombits}.

View file

@ -362,25 +362,6 @@ func init() {
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32 {name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
// direct bit operation on memory operand
//
// Note that these operations do not mask the bit offset (arg1), and will write beyond their expected
// bounds if that argument is larger than 64/32 (for BT*Q and BT*L, respectively). If the compiler
// cannot prove that arg1 is in range, it must be explicitly masked (see e.g. the patterns that produce
// BT*modify from (MOVstore (BT* (MOVLload ptr mem) x) mem)).
{name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTSLmodify", argLength: 3, reg: gpstore, asm: "BTSL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTRQmodify", argLength: 3, reg: gpstore, asm: "BTRQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 64-bit arg0+auxint+aux, arg2=mem
{name: "BTRLmodify", argLength: 3, reg: gpstore, asm: "BTRL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit arg1 in 32-bit arg0+auxint+aux, arg2=mem
{name: "BTCQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTCLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTCL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTSLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTSL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRQconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRQ", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 64-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "BTRLconstmodify", argLength: 2, reg: gpstoreconst, asm: "BTRL", aux: "SymValAndOff", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // reset bit ValAndOff(AuxInt).Val() in 32-bit arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0 {name: "TESTW", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTW", typ: "Flags"}, // (arg0 & arg1) compare to 0

View file

@ -692,18 +692,6 @@ const (
OpAMD64BTRQconst OpAMD64BTRQconst
OpAMD64BTSLconst OpAMD64BTSLconst
OpAMD64BTSQconst OpAMD64BTSQconst
OpAMD64BTCQmodify
OpAMD64BTCLmodify
OpAMD64BTSQmodify
OpAMD64BTSLmodify
OpAMD64BTRQmodify
OpAMD64BTRLmodify
OpAMD64BTCQconstmodify
OpAMD64BTCLconstmodify
OpAMD64BTSQconstmodify
OpAMD64BTSLconstmodify
OpAMD64BTRQconstmodify
OpAMD64BTRLconstmodify
OpAMD64TESTQ OpAMD64TESTQ
OpAMD64TESTL OpAMD64TESTL
OpAMD64TESTW OpAMD64TESTW
@ -8521,180 +8509,6 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "BTCQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRLmodify",
auxType: auxSymOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{1, 49151}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R15
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTCLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTSLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRQconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{
name: "BTRLconstmodify",
auxType: auxSymValAndOff,
argLen: 2,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 4295032831}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 g R15 SB
},
},
},
{ {
name: "TESTQ", name: "TESTQ",
argLen: 2, argLen: 2,

File diff suppressed because it is too large Load diff

View file

@ -62,3 +62,63 @@ func BenchmarkConstModify(b *testing.B) {
} }
} }
} }
func BenchmarkBitSet(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] |= 1 << (j % 64)
}
}
}
func BenchmarkBitClear(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] &^= 1 << (j % 64)
}
}
}
func BenchmarkBitToggle(b *testing.B) {
const N = 64 * 8
a := make([]uint64, N/64)
for i := 0; i < b.N; i++ {
for j := uint64(0); j < N; j++ {
a[j/64] ^= 1 << (j % 64)
}
}
}
func BenchmarkBitSetConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] |= 1 << 37
}
}
}
func BenchmarkBitClearConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] &^= 1 << 37
}
}
}
func BenchmarkBitToggleConst(b *testing.B) {
const N = 64
a := make([]uint64, N)
for i := 0; i < b.N; i++ {
for j := range a {
a[j] ^= 1 << 37
}
}
}

View file

@ -270,18 +270,6 @@ func bitOpOnMem(a []uint32, b, c, d uint32) {
a[1] |= 220 a[1] |= 220
// amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)` // amd64:`XORL\s[$]240,\s8\([A-Z][A-Z0-9]+\)`
a[2] ^= 240 a[2] ^= 240
// amd64:`BTRL\s[$]15,\s12\([A-Z][A-Z0-9]+\)`,-`ANDL`
a[3] &= 0xffff7fff
// amd64:`BTSL\s[$]14,\s16\([A-Z][A-Z0-9]+\)`,-`ORL`
a[4] |= 0x4000
// amd64:`BTCL\s[$]13,\s20\([A-Z][A-Z0-9]+\)`,-`XORL`
a[5] ^= 0x2000
// amd64:`BTRL\s[A-Z][A-Z0-9]+,\s24\([A-Z][A-Z0-9]+\)`
a[6] &^= 1 << (b & 31)
// amd64:`BTSL\s[A-Z][A-Z0-9]+,\s28\([A-Z][A-Z0-9]+\)`
a[7] |= 1 << (c & 31)
// amd64:`BTCL\s[A-Z][A-Z0-9]+,\s32\([A-Z][A-Z0-9]+\)`
a[8] ^= 1 << (d & 31)
} }
func bitcheckMostNegative(b uint8) bool { func bitcheckMostNegative(b uint8) bool {