cmd/compile: combine more 32 bit shift and mask operations on ppc64

Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.

Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.

Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).

Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.

The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):

pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt                               52.2µs ± 0%    47.5µs ± 0%  -8.88%
ExpandKey                                       44.4µs ± 0%    40.3µs ± 0%  -9.15%

pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key                                             57.6ms ± 0%    52.3ms ± 0%  -9.13%

pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal                                           90.9ms ± 0%    82.6ms ± 0%  -9.13%
DefaultCost                                     91.0ms ± 0%    82.7ms ± 0%  -9.12%

Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
Paul E. Murphy 2020-10-23 12:12:34 -05:00 committed by Lynn Boger
parent e3bb53a768
commit c3c6fbf314
9 changed files with 900 additions and 21 deletions

View file

@ -649,6 +649,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
// Auxint holds encoded rotate + mask
case ssa.OpPPC64RLWINM, ssa.OpPPC64RLWMI:
rot, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
p := s.Prog(v.Op.Asm())
p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
p.Reg = v.Args[0].Reg()
p.From = obj.Addr{Type: obj.TYPE_CONST, Offset: int64(rot)}
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
// Auxint holds mask
case ssa.OpPPC64RLWNM:
_, _, _, mask := ssa.DecodePPC64RotateMask(v.AuxInt)
p := s.Prog(v.Op.Asm())
p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
p.Reg = v.Args[0].Reg()
p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()}
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: int64(mask)})
case ssa.OpPPC64MADDLD:
r := v.Reg()
r1 := v.Args[0].Reg()

View file

@ -150,6 +150,31 @@
(ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31])
(ROTL x (MOVDconst [c])) => (ROTLconst x [c&63])
// Combine rotate and mask operations
(ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
(AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
(ANDconst [m] (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
(AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
// Note, any rotated word bitmask is still a valid word bitmask.
(ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
(ROTLWconst [r] (ANDconst [m] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
(ANDconst [m] (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
(ANDconst [m] (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
(SRWconst (ANDconst [m] x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
(SRWconst (ANDconst [m] x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
// Merge shift right + shift left and clear left (e.g for a table lookup)
(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x)
// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF]
(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)
// large constant shifts
(Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 => (MOVDconst [0])

View file

@ -137,6 +137,7 @@ func init() {
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
@ -227,6 +228,10 @@ func init() {
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
{name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
{name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
{name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)

View file

@ -1871,6 +1871,9 @@ const (
OpPPC64ROTLconst
OpPPC64ROTLWconst
OpPPC64EXTSWSLconst
OpPPC64RLWINM
OpPPC64RLWNM
OpPPC64RLWMI
OpPPC64CNTLZD
OpPPC64CNTLZW
OpPPC64CNTTZD
@ -24971,6 +24974,51 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "RLWINM",
auxType: auxInt64,
argLen: 1,
asm: ppc64.ARLWNM,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "RLWNM",
auxType: auxInt64,
argLen: 2,
asm: ppc64.ARLWNM,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "RLWMI",
auxType: auxInt64,
argLen: 2,
resultInArg0: true,
asm: ppc64.ARLWMI,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "CNTLZD",
argLen: 1,

View file

@ -1381,6 +1381,71 @@ func GetPPC64Shiftme(auxint int64) int64 {
return int64(int8(auxint))
}
// Test if this value can encoded as a mask for a rlwinm like
// operation. Masks can also extend from the msb and wrap to
// the lsb too. That is, the valid masks are 32 bit strings
// of the form: 0..01..10..0 or 1..10..01..1 or 1...1
func isPPC64WordRotateMask(v64 int64) bool {
// Isolate rightmost 1 (if none 0) and add.
v := uint32(v64)
vp := (v & -v) + v
// Likewise, for the wrapping case.
vn := ^v
vpn := (vn & -vn) + vn
return (v&vp == 0 || vn&vpn == 0) && v != 0
}
// Compress mask and and shift into single value of the form
// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
// be used to regenerate the input mask.
func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
var mb, me, mbn, men int
// Determine boundaries and then decode them
if mask == 0 || ^mask == 0 || rotate >= nbits {
panic("Invalid PPC64 rotate mask")
} else if nbits == 32 {
mb = bits.LeadingZeros32(uint32(mask))
me = 32 - bits.TrailingZeros32(uint32(mask))
mbn = bits.LeadingZeros32(^uint32(mask))
men = 32 - bits.TrailingZeros32(^uint32(mask))
} else {
mb = bits.LeadingZeros64(uint64(mask))
me = 64 - bits.TrailingZeros64(uint64(mask))
mbn = bits.LeadingZeros64(^uint64(mask))
men = 64 - bits.TrailingZeros64(^uint64(mask))
}
// Check for a wrapping mask (e.g bits at 0 and 63)
if mb == 0 && me == int(nbits) {
// swap the inverted values
mb, me = men, mbn
}
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
}
// The inverse operation of encodePPC64RotateMask. The values returned as
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
auxint := uint64(sauxint)
rotate = int64((auxint >> 16) & 0xFF)
mb = int64((auxint >> 8) & 0xFF)
me = int64((auxint >> 0) & 0xFF)
nbits := int64((auxint >> 24) & 0xFF)
mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
if mb > me {
mask = ^mask
}
if nbits == 32 {
mask = uint64(uint32(mask))
}
// Fixup ME to match ISA definition. The second argument to MASK(..,me)
// is inclusive.
me = (me - 1) & (nbits - 1)
return
}
// This verifies that the mask occupies the
// rightmost bits.
func isPPC64ValidShiftMask(v int64) bool {
@ -1394,6 +1459,78 @@ func getPPC64ShiftMaskLength(v int64) int64 {
return int64(bits.Len64(uint64(v)))
}
// Decompose a shift right into an equivalent rotate/mask,
// and return mask & m.
func mergePPC64RShiftMask(m, s, nbits int64) int64 {
smask := uint64((1<<uint(nbits))-1) >> uint(s)
return m & int64(smask)
}
// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
func mergePPC64AndSrwi(m, s int64) int64 {
mask := mergePPC64RShiftMask(m, s, 32)
if !isPPC64WordRotateMask(mask) {
return 0
}
return encodePPC64RotateMask(32-s, mask, 32)
}
// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
mask_1 := uint64(0xFFFFFFFF >> uint(srw))
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// Rewrite mask to apply after the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
r_1 := 32 - srw
r_2 := GetPPC64Shiftsh(sld)
r_3 := (r_1 + r_2) & 31 // This can wrap.
if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
return 0
}
return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
}
// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
// the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// combine the masks, and adjust for the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
r_2 := GetPPC64Shiftsh(int64(sld))
r_3 := (r_1 + r_2) & 31 // This can wrap.
// Verify the result is still a valid bitmask of <= 32 bits.
if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
return 0
}
return encodePPC64RotateMask(r_3, int64(mask_3), 32)
}
// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
// or return 0 if they cannot be combined.
func mergePPC64SldiSrw(sld, srw int64) int64 {
if sld > srw || srw >= 32 {
return 0
}
mask_r := uint32(0xFFFFFFFF) >> uint(srw)
mask_l := uint32(0xFFFFFFFF) >> uint(sld)
mask := (mask_r & mask_l) << uint(sld)
return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
}
// Convenience function to rotate a 32 bit constant value by another constant.
func rotateLeft32(v, rotate int64) int64 {
return int64(bits.RotateLeft32(uint32(v), int(rotate)))
}
// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
func armBFAuxInt(lsb, width int64) arm64BitField {
if lsb < 0 || lsb > 63 {

View file

@ -444,6 +444,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpPPC64ANDN(v)
case OpPPC64ANDconst:
return rewriteValuePPC64_OpPPC64ANDconst(v)
case OpPPC64CLRLSLDI:
return rewriteValuePPC64_OpPPC64CLRLSLDI(v)
case OpPPC64CMP:
return rewriteValuePPC64_OpPPC64CMP(v)
case OpPPC64CMPU:
@ -598,6 +600,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpPPC64ROTL(v)
case OpPPC64ROTLW:
return rewriteValuePPC64_OpPPC64ROTLW(v)
case OpPPC64ROTLWconst:
return rewriteValuePPC64_OpPPC64ROTLWconst(v)
case OpPPC64SLD:
return rewriteValuePPC64_OpPPC64SLD(v)
case OpPPC64SLDconst:
@ -614,6 +618,8 @@ func rewriteValuePPC64(v *Value) bool {
return rewriteValuePPC64_OpPPC64SRD(v)
case OpPPC64SRW:
return rewriteValuePPC64_OpPPC64SRW(v)
case OpPPC64SRWconst:
return rewriteValuePPC64_OpPPC64SRWconst(v)
case OpPPC64SUB:
return rewriteValuePPC64_OpPPC64SUB(v)
case OpPPC64SUBFCconst:
@ -4212,6 +4218,100 @@ func rewriteValuePPC64_OpPPC64ADDconst(v *Value) bool {
func rewriteValuePPC64_OpPPC64AND(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
// match: (AND (MOVDconst [m]) (ROTLWconst [r] x))
// cond: isPPC64WordRotateMask(m)
// result: (RLWINM [encodePPC64RotateMask(r,m,32)] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0.AuxInt)
if v_1.Op != OpPPC64ROTLWconst {
continue
}
r := auxIntToInt64(v_1.AuxInt)
x := v_1.Args[0]
if !(isPPC64WordRotateMask(m)) {
continue
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32))
v.AddArg(x)
return true
}
break
}
// match: (AND (MOVDconst [m]) (ROTLW x r))
// cond: isPPC64WordRotateMask(m)
// result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0.AuxInt)
if v_1.Op != OpPPC64ROTLW {
continue
}
r := v_1.Args[1]
x := v_1.Args[0]
if !(isPPC64WordRotateMask(m)) {
continue
}
v.reset(OpPPC64RLWNM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
v.AddArg2(x, r)
return true
}
break
}
// match: (AND (MOVDconst [m]) (SRWconst x [s]))
// cond: mergePPC64RShiftMask(m,s,32) == 0
// result: (MOVDconst [0])
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0.AuxInt)
if v_1.Op != OpPPC64SRWconst {
continue
}
s := auxIntToInt64(v_1.AuxInt)
if !(mergePPC64RShiftMask(m, s, 32) == 0) {
continue
}
v.reset(OpPPC64MOVDconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
break
}
// match: (AND (MOVDconst [m]) (SRWconst x [s]))
// cond: mergePPC64AndSrwi(m,s) != 0
// result: (RLWINM [mergePPC64AndSrwi(m,s)] x)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
if v_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0.AuxInt)
if v_1.Op != OpPPC64SRWconst {
continue
}
s := auxIntToInt64(v_1.AuxInt)
x := v_1.Args[0]
if !(mergePPC64AndSrwi(m, s) != 0) {
continue
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s))
v.AddArg(x)
return true
}
break
}
// match: (AND x (NOR y y))
// result: (ANDN x y)
for {
@ -4347,6 +4447,76 @@ func rewriteValuePPC64_OpPPC64ANDN(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ANDconst [m] (ROTLWconst [r] x))
// cond: isPPC64WordRotateMask(m)
// result: (RLWINM [encodePPC64RotateMask(r,m,32)] x)
for {
m := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64ROTLWconst {
break
}
r := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(isPPC64WordRotateMask(m)) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, m, 32))
v.AddArg(x)
return true
}
// match: (ANDconst [m] (ROTLW x r))
// cond: isPPC64WordRotateMask(m)
// result: (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
for {
m := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64ROTLW {
break
}
r := v_0.Args[1]
x := v_0.Args[0]
if !(isPPC64WordRotateMask(m)) {
break
}
v.reset(OpPPC64RLWNM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(0, m, 32))
v.AddArg2(x, r)
return true
}
// match: (ANDconst [m] (SRWconst x [s]))
// cond: mergePPC64RShiftMask(m,s,32) == 0
// result: (MOVDconst [0])
for {
m := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64SRWconst {
break
}
s := auxIntToInt64(v_0.AuxInt)
if !(mergePPC64RShiftMask(m, s, 32) == 0) {
break
}
v.reset(OpPPC64MOVDconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
// match: (ANDconst [m] (SRWconst x [s]))
// cond: mergePPC64AndSrwi(m,s) != 0
// result: (RLWINM [mergePPC64AndSrwi(m,s)] x)
for {
m := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64SRWconst {
break
}
s := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(mergePPC64AndSrwi(m, s) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m, s))
v.AddArg(x)
return true
}
// match: (ANDconst [c] (ANDconst [d] x))
// result: (ANDconst [c&d] x)
for {
@ -4511,6 +4681,47 @@ func rewriteValuePPC64_OpPPC64ANDconst(v *Value) bool {
}
return false
}
func rewriteValuePPC64_OpPPC64CLRLSLDI(v *Value) bool {
v_0 := v.Args[0]
// match: (CLRLSLDI [c] (SRWconst [s] x))
// cond: mergePPC64ClrlsldiSrw(int64(c),s) != 0
// result: (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
for {
c := auxIntToInt32(v.AuxInt)
if v_0.Op != OpPPC64SRWconst {
break
}
s := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(mergePPC64ClrlsldiSrw(int64(c), s) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiSrw(int64(c), s))
v.AddArg(x)
return true
}
// match: (CLRLSLDI [c] i:(RLWINM [s] x))
// cond: mergePPC64ClrlsldiRlwinm(c,s) != 0
// result: (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)
for {
c := auxIntToInt32(v.AuxInt)
i := v_0
if i.Op != OpPPC64RLWINM {
break
}
s := auxIntToInt64(i.AuxInt)
x := i.Args[0]
if !(mergePPC64ClrlsldiRlwinm(c, s) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64ClrlsldiRlwinm(c, s))
v.AddArg(x)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64CMP(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -12850,6 +13061,55 @@ func rewriteValuePPC64_OpPPC64ROTLW(v *Value) bool {
}
return false
}
func rewriteValuePPC64_OpPPC64ROTLWconst(v *Value) bool {
v_0 := v.Args[0]
// match: (ROTLWconst [r] (AND (MOVDconst [m]) x))
// cond: isPPC64WordRotateMask(m)
// result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
for {
r := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64AND {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
if v_0_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0_0.AuxInt)
x := v_0_1
if !(isPPC64WordRotateMask(m)) {
continue
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32))
v.AddArg(x)
return true
}
break
}
// match: (ROTLWconst [r] (ANDconst [m] x))
// cond: isPPC64WordRotateMask(m)
// result: (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
for {
r := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64ANDconst {
break
}
m := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(isPPC64WordRotateMask(m)) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(encodePPC64RotateMask(r, rotateLeft32(m, r), 32))
v.AddArg(x)
return true
}
return false
}
func rewriteValuePPC64_OpPPC64SLD(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]
@ -12870,6 +13130,24 @@ func rewriteValuePPC64_OpPPC64SLD(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64SLDconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SLDconst [l] (SRWconst [r] x))
// cond: mergePPC64SldiSrw(l,r) != 0
// result: (RLWINM [mergePPC64SldiSrw(l,r)] x)
for {
l := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64SRWconst {
break
}
r := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(mergePPC64SldiSrw(l, r) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64SldiSrw(l, r))
v.AddArg(x)
return true
}
// match: (SLDconst [c] z:(MOVBZreg x))
// cond: c < 8 && z.Uses == 1
// result: (CLRLSLDI [newPPC64ShiftAuxInt(c,56,63,64)] x)
@ -13186,6 +13464,96 @@ func rewriteValuePPC64_OpPPC64SRW(v *Value) bool {
}
return false
}
func rewriteValuePPC64_OpPPC64SRWconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SRWconst (ANDconst [m] x) [s])
// cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0
// result: (MOVDconst [0])
for {
s := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64ANDconst {
break
}
m := auxIntToInt64(v_0.AuxInt)
if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) {
break
}
v.reset(OpPPC64MOVDconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
// match: (SRWconst (ANDconst [m] x) [s])
// cond: mergePPC64AndSrwi(m>>uint(s),s) != 0
// result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
for {
s := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64ANDconst {
break
}
m := auxIntToInt64(v_0.AuxInt)
x := v_0.Args[0]
if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) {
break
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s))
v.AddArg(x)
return true
}
// match: (SRWconst (AND (MOVDconst [m]) x) [s])
// cond: mergePPC64RShiftMask(m>>uint(s),s,32) == 0
// result: (MOVDconst [0])
for {
s := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64AND {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
if v_0_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0_0.AuxInt)
if !(mergePPC64RShiftMask(m>>uint(s), s, 32) == 0) {
continue
}
v.reset(OpPPC64MOVDconst)
v.AuxInt = int64ToAuxInt(0)
return true
}
break
}
// match: (SRWconst (AND (MOVDconst [m]) x) [s])
// cond: mergePPC64AndSrwi(m>>uint(s),s) != 0
// result: (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
for {
s := auxIntToInt64(v.AuxInt)
if v_0.Op != OpPPC64AND {
break
}
_ = v_0.Args[1]
v_0_0 := v_0.Args[0]
v_0_1 := v_0.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_0_0, v_0_1 = _i0+1, v_0_1, v_0_0 {
if v_0_0.Op != OpPPC64MOVDconst {
continue
}
m := auxIntToInt64(v_0_0.AuxInt)
x := v_0_1
if !(mergePPC64AndSrwi(m>>uint(s), s) != 0) {
continue
}
v.reset(OpPPC64RLWINM)
v.AuxInt = int64ToAuxInt(mergePPC64AndSrwi(m>>uint(s), s))
v.AddArg(x)
return true
}
break
}
return false
}
func rewriteValuePPC64_OpPPC64SUB(v *Value) bool {
v_1 := v.Args[1]
v_0 := v.Args[0]

View file

@ -36,3 +36,184 @@ func TestSubFlags(t *testing.T) {
t.Errorf("subFlags32(0,1).ult() returned false")
}
}
func TestIsPPC64WordRotateMask(t *testing.T) {
tests := []struct {
input int64
expected bool
}{
{0x00000001, true},
{0x80000001, true},
{0x80010001, false},
{0xFFFFFFFA, false},
{0xF0F0F0F0, false},
{0xFFFFFFFD, true},
{0x80000000, true},
{0x00000000, false},
{0xFFFFFFFF, true},
{0x0000FFFF, true},
{0xFF0000FF, true},
{0x00FFFF00, true},
}
for _, v := range tests {
if v.expected != isPPC64WordRotateMask(v.input) {
t.Errorf("isPPC64WordRotateMask(0x%x) failed", v.input)
}
}
}
func TestEncodeDecodePPC64WordRotateMask(t *testing.T) {
tests := []struct {
rotate int64
mask uint64
nbits,
mb,
me,
encoded int64
}{
{1, 0x00000001, 32, 31, 31, 0x20011f20},
{2, 0x80000001, 32, 31, 0, 0x20021f01},
{3, 0xFFFFFFFD, 32, 31, 29, 0x20031f1e},
{4, 0x80000000, 32, 0, 0, 0x20040001},
{5, 0xFFFFFFFF, 32, 0, 31, 0x20050020},
{6, 0x0000FFFF, 32, 16, 31, 0x20061020},
{7, 0xFF0000FF, 32, 24, 7, 0x20071808},
{8, 0x00FFFF00, 32, 8, 23, 0x20080818},
{9, 0x0000000000FFFF00, 64, 40, 55, 0x40092838},
{10, 0xFFFF000000000000, 64, 0, 15, 0x400A0010},
{10, 0xFFFF000000000001, 64, 63, 15, 0x400A3f10},
}
for i, v := range tests {
result := encodePPC64RotateMask(v.rotate, int64(v.mask), v.nbits)
if result != v.encoded {
t.Errorf("encodePPC64RotateMask(%d,0x%x,%d) = 0x%x, expected 0x%x", v.rotate, v.mask, v.nbits, result, v.encoded)
}
rotate, mb, me, mask := DecodePPC64RotateMask(result)
if rotate != v.rotate || mb != v.mb || me != v.me || mask != v.mask {
t.Errorf("DecodePPC64Failure(Test %d) got (%d, %d, %d, %x) expected (%d, %d, %d, %x)", i, rotate, mb, me, mask, v.rotate, v.mb, v.me, v.mask)
}
}
}
func TestMergePPC64ClrlsldiSrw(t *testing.T) {
tests := []struct {
clrlsldi int32
srw int64
valid bool
rotate int64
mask uint64
}{
// ((x>>4)&0xFF)<<4
{newPPC64ShiftAuxInt(4, 56, 63, 64), 4, true, 0, 0xFF0},
// ((x>>4)&0xFFFF)<<4
{newPPC64ShiftAuxInt(4, 48, 63, 64), 4, true, 0, 0xFFFF0},
// ((x>>4)&0xFFFF)<<17
{newPPC64ShiftAuxInt(17, 48, 63, 64), 4, false, 0, 0},
// ((x>>4)&0xFFFF)<<16
{newPPC64ShiftAuxInt(16, 48, 63, 64), 4, true, 12, 0xFFFF0000},
// ((x>>32)&0xFFFF)<<17
{newPPC64ShiftAuxInt(17, 48, 63, 64), 32, false, 0, 0},
}
for i, v := range tests {
result := mergePPC64ClrlsldiSrw(int64(v.clrlsldi), v.srw)
if v.valid && result == 0 {
t.Errorf("mergePPC64ClrlsldiSrw(Test %d) did not merge", i)
} else if !v.valid && result != 0 {
t.Errorf("mergePPC64ClrlsldiSrw(Test %d) should return 0", i)
} else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m {
t.Errorf("mergePPC64ClrlsldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask)
}
}
}
func TestMergePPC64ClrlsldiRlwinm(t *testing.T) {
tests := []struct {
clrlsldi int32
rlwinm int64
valid bool
rotate int64
mask uint64
}{
// ((x<<4)&0xFF00)<<4
{newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(4, 0xFF00, 32), false, 0, 0},
// ((x>>4)&0xFF)<<4
{newPPC64ShiftAuxInt(4, 56, 63, 64), encodePPC64RotateMask(28, 0x0FFFFFFF, 32), true, 0, 0xFF0},
// ((x>>4)&0xFFFF)<<4
{newPPC64ShiftAuxInt(4, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 0, 0xFFFF0},
// ((x>>4)&0xFFFF)<<17
{newPPC64ShiftAuxInt(17, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), false, 0, 0},
// ((x>>4)&0xFFFF)<<16
{newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xFFFF, 32), true, 12, 0xFFFF0000},
// ((x>>4)&0xF000FFFF)<<16
{newPPC64ShiftAuxInt(16, 48, 63, 64), encodePPC64RotateMask(28, 0xF000FFFF, 32), true, 12, 0xFFFF0000},
}
for i, v := range tests {
result := mergePPC64ClrlsldiRlwinm(v.clrlsldi, v.rlwinm)
if v.valid && result == 0 {
t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) did not merge", i)
} else if !v.valid && result != 0 {
t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) should return 0", i)
} else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m {
t.Errorf("mergePPC64ClrlsldiRlwinm(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask)
}
}
}
func TestMergePPC64SldiSrw(t *testing.T) {
tests := []struct {
sld int64
srw int64
valid bool
rotate int64
mask uint64
}{
{4, 4, true, 0, 0xFFFFFFF0},
{4, 8, true, 28, 0x0FFFFFF0},
{0, 0, true, 0, 0xFFFFFFFF},
{8, 4, false, 0, 0},
{0, 32, false, 0, 0},
{0, 31, true, 1, 0x1},
{31, 31, true, 0, 0x80000000},
{32, 32, false, 0, 0},
}
for i, v := range tests {
result := mergePPC64SldiSrw(v.sld, v.srw)
if v.valid && result == 0 {
t.Errorf("mergePPC64SldiSrw(Test %d) did not merge", i)
} else if !v.valid && result != 0 {
t.Errorf("mergePPC64SldiSrw(Test %d) should return 0", i)
} else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m {
t.Errorf("mergePPC64SldiSrw(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask)
}
}
}
func TestMergePPC64AndSrwi(t *testing.T) {
tests := []struct {
and int64
srw int64
valid bool
rotate int64
mask uint64
}{
{0x000000FF, 8, true, 24, 0xFF},
{0xF00000FF, 8, true, 24, 0xFF},
{0x0F0000FF, 4, false, 0, 0},
{0x00000000, 4, false, 0, 0},
{0xF0000000, 4, false, 0, 0},
{0xF0000000, 32, false, 0, 0},
}
for i, v := range tests {
result := mergePPC64AndSrwi(v.and, v.srw)
if v.valid && result == 0 {
t.Errorf("mergePPC64AndSrwi(Test %d) did not merge", i)
} else if !v.valid && result != 0 {
t.Errorf("mergePPC64AndSrwi(Test %d) should return 0", i)
} else if r, _, _, m := DecodePPC64RotateMask(result); v.rotate != r || v.mask != m {
t.Errorf("mergePPC64AndSrwi(Test %d) got (%d,0x%x) expected (%d,0x%x)", i, r, m, v.rotate, v.mask)
}
}
}

View file

@ -6,6 +6,8 @@
package codegen
import "math/bits"
// ------------------- //
// const rotates //
// ------------------- //
@ -166,3 +168,46 @@ func f32(x uint32) uint32 {
// amd64:"ROLL\t[$]7"
return rot32nc(x, 7)
}
// --------------------------------------- //
// Combined Rotate + Masking operations //
// --------------------------------------- //
func checkMaskedRotate32(a []uint32, r int) {
i := 0
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 16) & 0xFF0000
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF, 16)
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4080, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], 4) & 0xFF0
i++
// ppc64le: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
// ppc64: "RLWNM\t[$]16, R[0-9]+, [$]255, R[0-9]+"
a[i] = bits.RotateLeft32(a[i]&0xFF0000, 16)
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]16711680, R[0-9]+"
a[i] = bits.RotateLeft32(a[i], r) & 0xFF0000
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]65280, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], r) & 0xFF00
i++
// ppc64le: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
// ppc64: "RLWNM\tR[0-9]+, R[0-9]+, [$]4293922815, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], r) & 0xFFF00FFF
i++
// ppc64le: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
// ppc64: "RLWNM\t[$]4, R[0-9]+, [$]4293922815, R[0-9]+"
a[i] = bits.RotateLeft32(a[3], 4) & 0xFFF00FFF
i++
}

View file

@ -175,10 +175,10 @@ func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byt
f += tab[v&0xff]
// ppc64le:-".*AND",".*CLRLSLWI"
// ppc64:-".*AND",".*CLRLSLWI"
f += 2*uint32(uint16(d))
f += 2 * uint32(uint16(d))
// ppc64le:-".*AND",-"RLDICR",".*CLRLSLDI"
// ppc64:-".*AND",-"RLDICR",".*CLRLSLDI"
g := 2*uint64(uint32(d))
g := 2 * uint64(uint32(d))
return f, g
}
@ -186,10 +186,10 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
f := (v8 &0xF) << 2
f := (v8 & 0xF) << 2
// ppc64le:"CLRLSLWI"
// ppc64:"CLRLSLWI"
f += byte(v16)<<3
f += byte(v16) << 3
// ppc64le:-"AND","CLRLSLWI"
// ppc64:-"AND","CLRLSLWI"
g := (v16 & 0xFF) << 3
@ -207,29 +207,81 @@ func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64
i += (v64 & 0xFFFF00) << 10
// ppc64le/power9:-"SLD","EXTSWSLI"
// ppc64/power9:-"SLD","EXTSWSLI"
j := int64(x32+32)*8
j := int64(x32+32) * 8
return f, g, h, i, j
}
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
// ppc64le:-".*MOVW"
f := int32(v>>32)
f := int32(v >> 32)
// ppc64le:".*MOVW"
f += int32(v>>31)
f += int32(v >> 31)
// ppc64le:-".*MOVH"
g := int16(v>>48)
g := int16(v >> 48)
// ppc64le:".*MOVH"
g += int16(v>>30)
g += int16(v >> 30)
// ppc64le:-".*MOVH"
g += int16(f>>16)
g += int16(f >> 16)
// ppc64le:-".*MOVB"
h := int8(v>>56)
h := int8(v >> 56)
// ppc64le:".*MOVB"
h += int8(v>>28)
h += int8(v >> 28)
// ppc64le:-".*MOVB"
h += int8(f>>24)
h += int8(f >> 24)
// ppc64le:".*MOVB"
h += int8(f>>16)
return int64(h),uint64(g)
h += int8(f >> 16)
return int64(h), uint64(g)
}
func checkShiftAndMask32(v []uint32) {
i := 0
// ppc64le: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
// ppc64: "RLWNM\t[$]24, R[0-9]+, [$]1044480, R[0-9]+"
v[i] = (v[i] & 0xFF00000) >> 8
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1020, R[0-9]+"
v[i] = (v[i] & 0xFF00) >> 6
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] & 0xFF) >> 8
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] & 0xF000000) >> 28
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]255, R[0-9]+"
v[i] = (v[i] >> 6) & 0xFF
i++
// ppc64le: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
// ppc64: "RLWNM\t[$]26, R[0-9]+, [$]1044480, R[0-9]+"
v[i] = (v[i] >> 6) & 0xFF000
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] >> 20) & 0xFF000
i++
// ppc64le: "MOVW\tR0"
// ppc64: "MOVW\tR0"
v[i] = (v[i] >> 24) & 0xFF00
i++
}
func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]1020, R[0-9]+"
a[0] = a[uint8(v>>24)]
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]2040, R[0-9]+"
b[0] = b[uint8(v>>24)]
//ppc64le: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
//ppc64: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]2040, R[0-9]+"
b[1] = b[(v>>20)&0xFF]
//ppc64le: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
//ppc64: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]1016, R[0-9]+"
b[2] = b[v>>25]
}