cmd/compile: combine more 32 bit shift and mask operations on ppc64

Combine (AND m (SRWconst x)) or (SRWconst (AND m x)) when mask m is
and the shift value produce constant which can be encoded into an
RLWINM instruction.

Combine (CLRLSLDI (SRWconst x)) if the combining of the underling rotate
masks produces a constant which can be encoded into RLWINM.

Likewise for (SLDconst (SRWconst x)) and (CLRLSDI (RLWINM x)).

Combine rotate word + and operations which can be encoded as a single
RLWINM/RLWNM instruction.

The most notable performance improvements arise from the crypto
benchmarks below (GOARCH=power8 on a ppc64le/linux):

pkg:golang.org/x/crypto/blowfish goos:linux goarch:ppc64le
ExpandKeyWithSalt                               52.2µs ± 0%    47.5µs ± 0%  -8.88%
ExpandKey                                       44.4µs ± 0%    40.3µs ± 0%  -9.15%

pkg:golang.org/x/crypto/ssh/internal/bcrypt_pbkdf goos:linux goarch:ppc64le
Key                                             57.6ms ± 0%    52.3ms ± 0%  -9.13%

pkg:golang.org/x/crypto/bcrypt goos:linux goarch:ppc64le
Equal                                           90.9ms ± 0%    82.6ms ± 0%  -9.13%
DefaultCost                                     91.0ms ± 0%    82.7ms ± 0%  -9.12%

Change-Id: I59a0ca29face38f4ab46e37124c32906f216c4ce
Reviewed-on: https://go-review.googlesource.com/c/go/+/260798
Run-TryBot: Carlos Eduardo Seo <carlos.seo@linaro.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Carlos Eduardo Seo <carlos.seo@linaro.com>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
This commit is contained in:
Paul E. Murphy 2020-10-23 12:12:34 -05:00 committed by Lynn Boger
parent e3bb53a768
commit c3c6fbf314
9 changed files with 900 additions and 21 deletions

View file

@ -1381,6 +1381,71 @@ func GetPPC64Shiftme(auxint int64) int64 {
return int64(int8(auxint))
}
// Test if this value can encoded as a mask for a rlwinm like
// operation. Masks can also extend from the msb and wrap to
// the lsb too. That is, the valid masks are 32 bit strings
// of the form: 0..01..10..0 or 1..10..01..1 or 1...1
func isPPC64WordRotateMask(v64 int64) bool {
// Isolate rightmost 1 (if none 0) and add.
v := uint32(v64)
vp := (v & -v) + v
// Likewise, for the wrapping case.
vn := ^v
vpn := (vn & -vn) + vn
return (v&vp == 0 || vn&vpn == 0) && v != 0
}
// Compress mask and and shift into single value of the form
// me | mb<<8 | rotate<<16 | nbits<<24 where me and mb can
// be used to regenerate the input mask.
func encodePPC64RotateMask(rotate, mask, nbits int64) int64 {
var mb, me, mbn, men int
// Determine boundaries and then decode them
if mask == 0 || ^mask == 0 || rotate >= nbits {
panic("Invalid PPC64 rotate mask")
} else if nbits == 32 {
mb = bits.LeadingZeros32(uint32(mask))
me = 32 - bits.TrailingZeros32(uint32(mask))
mbn = bits.LeadingZeros32(^uint32(mask))
men = 32 - bits.TrailingZeros32(^uint32(mask))
} else {
mb = bits.LeadingZeros64(uint64(mask))
me = 64 - bits.TrailingZeros64(uint64(mask))
mbn = bits.LeadingZeros64(^uint64(mask))
men = 64 - bits.TrailingZeros64(^uint64(mask))
}
// Check for a wrapping mask (e.g bits at 0 and 63)
if mb == 0 && me == int(nbits) {
// swap the inverted values
mb, me = men, mbn
}
return int64(me) | int64(mb<<8) | int64(rotate<<16) | int64(nbits<<24)
}
// The inverse operation of encodePPC64RotateMask. The values returned as
// mb and me satisfy the POWER ISA definition of MASK(x,y) where MASK(mb,me) = mask.
func DecodePPC64RotateMask(sauxint int64) (rotate, mb, me int64, mask uint64) {
auxint := uint64(sauxint)
rotate = int64((auxint >> 16) & 0xFF)
mb = int64((auxint >> 8) & 0xFF)
me = int64((auxint >> 0) & 0xFF)
nbits := int64((auxint >> 24) & 0xFF)
mask = ((1 << uint(nbits-mb)) - 1) ^ ((1 << uint(nbits-me)) - 1)
if mb > me {
mask = ^mask
}
if nbits == 32 {
mask = uint64(uint32(mask))
}
// Fixup ME to match ISA definition. The second argument to MASK(..,me)
// is inclusive.
me = (me - 1) & (nbits - 1)
return
}
// This verifies that the mask occupies the
// rightmost bits.
func isPPC64ValidShiftMask(v int64) bool {
@ -1394,6 +1459,78 @@ func getPPC64ShiftMaskLength(v int64) int64 {
return int64(bits.Len64(uint64(v)))
}
// Decompose a shift right into an equivalent rotate/mask,
// and return mask & m.
func mergePPC64RShiftMask(m, s, nbits int64) int64 {
smask := uint64((1<<uint(nbits))-1) >> uint(s)
return m & int64(smask)
}
// Combine (ANDconst [m] (SRWconst [s])) into (RLWINM [y]) or return 0
func mergePPC64AndSrwi(m, s int64) int64 {
mask := mergePPC64RShiftMask(m, s, 32)
if !isPPC64WordRotateMask(mask) {
return 0
}
return encodePPC64RotateMask(32-s, mask, 32)
}
// Test if a shift right feeding into a CLRLSLDI can be merged into RLWINM.
// Return the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiSrw(sld, srw int64) int64 {
mask_1 := uint64(0xFFFFFFFF >> uint(srw))
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// Rewrite mask to apply after the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(sld))
r_1 := 32 - srw
r_2 := GetPPC64Shiftsh(sld)
r_3 := (r_1 + r_2) & 31 // This can wrap.
if uint64(uint32(mask_3)) != mask_3 || mask_3 == 0 {
return 0
}
return encodePPC64RotateMask(int64(r_3), int64(mask_3), 32)
}
// Test if a RLWINM feeding into a CLRLSLDI can be merged into RLWINM. Return
// the encoded RLWINM constant, or 0 if they cannot be merged.
func mergePPC64ClrlsldiRlwinm(sld int32, rlw int64) int64 {
r_1, _, _, mask_1 := DecodePPC64RotateMask(rlw)
// for CLRLSLDI, it's more convient to think of it as a mask left bits then rotate left.
mask_2 := uint64(0xFFFFFFFFFFFFFFFF) >> uint(GetPPC64Shiftmb(int64(sld)))
// combine the masks, and adjust for the final left shift.
mask_3 := (mask_1 & mask_2) << uint(GetPPC64Shiftsh(int64(sld)))
r_2 := GetPPC64Shiftsh(int64(sld))
r_3 := (r_1 + r_2) & 31 // This can wrap.
// Verify the result is still a valid bitmask of <= 32 bits.
if !isPPC64WordRotateMask(int64(mask_3)) || uint64(uint32(mask_3)) != mask_3 {
return 0
}
return encodePPC64RotateMask(r_3, int64(mask_3), 32)
}
// Compute the encoded RLWINM constant from combining (SLDconst [sld] (SRWconst [srw] x)),
// or return 0 if they cannot be combined.
func mergePPC64SldiSrw(sld, srw int64) int64 {
if sld > srw || srw >= 32 {
return 0
}
mask_r := uint32(0xFFFFFFFF) >> uint(srw)
mask_l := uint32(0xFFFFFFFF) >> uint(sld)
mask := (mask_r & mask_l) << uint(sld)
return encodePPC64RotateMask((32-srw+sld)&31, int64(mask), 32)
}
// Convenience function to rotate a 32 bit constant value by another constant.
func rotateLeft32(v, rotate int64) int64 {
return int64(bits.RotateLeft32(uint32(v), int(rotate)))
}
// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
func armBFAuxInt(lsb, width int64) arm64BitField {
if lsb < 0 || lsb > 63 {