go/test/codegen/divmod.go
Russ Cox 1e5bb416d8 cmd/compile: implement bits.Mul64 on 32-bit systems
This CL implements Mul64uhilo, Hmul64, Hmul64u, and Avg64u
on 32-bit systems, with the effect that constant division of both
int64s and uint64s can now be emitted directly in all cases,
and also that bits.Mul64 can be intrinsified on 32-bit systems.

Previously, constant division of uint64s by values 0 ≤ c ≤ 0xFFFF were
implemented as uint32 divisions by c and some fixup. After expanding
those smaller constant divisions, the code for i/999 required:

	(386) 7 mul, 10 add, 2 sub, 3 rotate, 3 shift (104 bytes)
	(arm) 7 mul, 9 add, 3 sub, 2 shift (104 bytes)
	(mips) 7 mul, 10 add, 5 sub, 6 shift, 3 sgtu (176 bytes)

For that much code, we might as well use a full 64x64->128 multiply
that can be used for all divisors, not just small ones.
Having done that, the same i/999 now generates:

	(386) 4 mul, 9 add, 2 sub, 2 or, 6 shift (112 bytes)
	(arm) 4 mul, 8 add, 2 sub, 2 or, 3 shift (92 bytes)
	(mips) 4 mul, 11 add, 3 sub, 6 shift, 8 sgtu, 4 or (196 bytes)

The size increase on 386 is due to a few extra register spills.
The size increase on mips is due to add-with-carry being hard.

The new approach is more general, letting us delete the old special case
and guarantee that all int64 and uint64 divisions by constants are
generated directly on 32-bit systems.

This especially speeds up code making heavy use of bits.Mul64 with
a constant argument, which happens in strconv and various crypto
packages. A few examples are benchmarked below.

pkg: cmd/compile/internal/test

benchmark \ host                      local  linux-amd64       s7  linux-386  s7:GOARCH=386
                                    vs base      vs base  vs base    vs base        vs base
DivconstI64                               ~            ~        ~    -49.66%        -21.02%
ModconstI64                               ~            ~        ~    -13.45%        +14.52%
DivisiblePow2constI64                     ~            ~        ~     +0.97%         -1.32%
DivisibleconstI64                         ~            ~        ~    -20.01%        -48.28%
DivisibleWDivconstI64                     ~            ~   -1.76%    -38.59%        -42.74%
DivconstU64/3                             ~            ~        ~    -13.82%         -4.09%
DivconstU64/5                             ~            ~        ~    -14.10%         -3.54%
DivconstU64/37                       -2.07%       -4.45%        ~    -19.60%         -9.55%
DivconstU64/1234567                       ~            ~        ~    -61.55%        -56.93%
ModconstU64                               ~            ~        ~     -6.25%              ~
DivisibleconstU64                         ~            ~        ~     -2.78%         -7.82%
DivisibleWDivconstU64                     ~            ~        ~     +4.23%         +2.56%

pkg: math/bits

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Add                       ~            ~          ~              ~
Add32                +1.59%            ~          ~              ~
Add64                     ~            ~          ~              ~
Add64multiple             ~            ~          ~              ~
Sub                       ~            ~          ~              ~
Sub32                     ~            ~          ~              ~
Sub64                     ~            ~     -9.20%              ~
Sub64multiple             ~            ~          ~              ~
Mul                       ~            ~          ~              ~
Mul32                     ~            ~          ~              ~
Mul64                     ~            ~    -41.58%        -53.21%
Div                       ~            ~          ~              ~
Div32                     ~            ~          ~              ~
Div64                     ~            ~          ~              ~

pkg: strconv

benchmark \ host                       s7  linux-amd64  linux-386  s7:GOARCH=386
                                  vs base      vs base    vs base        vs base
ParseInt/Pos/7bit                       ~            ~    -11.08%         -6.75%
ParseInt/Pos/26bit                      ~            ~    -13.65%        -11.02%
ParseInt/Pos/31bit                      ~            ~    -14.65%         -9.71%
ParseInt/Pos/56bit                 -1.80%            ~    -17.97%        -10.78%
ParseInt/Pos/63bit                      ~            ~    -13.85%         -9.63%
ParseInt/Neg/7bit                       ~            ~    -12.14%         -7.26%
ParseInt/Neg/26bit                      ~            ~    -14.18%         -9.81%
ParseInt/Neg/31bit                      ~            ~    -14.51%         -9.02%
ParseInt/Neg/56bit                      ~            ~    -15.79%         -9.79%
ParseInt/Neg/63bit                      ~            ~    -15.68%        -11.07%
AppendFloat/Decimal                     ~            ~     -7.25%        -12.26%
AppendFloat/Float                       ~            ~    -15.96%        -19.45%
AppendFloat/Exp                         ~            ~    -13.96%        -17.76%
AppendFloat/NegExp                      ~            ~    -14.89%        -20.27%
AppendFloat/LongExp                     ~            ~    -12.68%        -17.97%
AppendFloat/Big                         ~            ~    -11.10%        -16.64%
AppendFloat/BinaryExp                   ~            ~          ~              ~
AppendFloat/32Integer                   ~            ~    -10.05%        -10.91%
AppendFloat/32ExactFraction             ~            ~     -8.93%        -13.00%
AppendFloat/32Point                     ~            ~    -10.36%        -14.89%
AppendFloat/32Exp                       ~            ~     -9.88%        -13.54%
AppendFloat/32NegExp                    ~            ~    -10.16%        -14.26%
AppendFloat/32Shortest                  ~            ~    -11.39%        -14.96%
AppendFloat/32Fixed8Hard                ~            ~          ~         -2.31%
AppendFloat/32Fixed9Hard                ~            ~          ~         -7.01%
AppendFloat/64Fixed1                    ~            ~     -2.83%         -8.23%
AppendFloat/64Fixed2                    ~            ~          ~         -7.94%
AppendFloat/64Fixed3                    ~            ~     -4.07%         -7.22%
AppendFloat/64Fixed4                    ~            ~     -7.24%         -7.62%
AppendFloat/64Fixed12                   ~            ~     -6.57%         -4.82%
AppendFloat/64Fixed16                   ~            ~     -4.00%         -5.81%
AppendFloat/64Fixed12Hard          -2.22%            ~     -4.07%         -6.35%
AppendFloat/64Fixed17Hard          -2.12%            ~          ~         -3.79%
AppendFloat/64Fixed18Hard          -1.89%            ~     +2.48%              ~
AppendFloat/Slowpath64             -1.85%            ~    -14.49%        -18.21%
AppendFloat/SlowpathDenormal64          ~            ~    -13.08%        -19.41%

pkg: crypto/internal/fips140/nistec/fiat

benchmark \ host         s7  linux-amd64  linux-386  s7:GOARCH=386
                    vs base      vs base    vs base        vs base
Mul/P224                  ~            ~    -29.95%        -39.60%
Mul/P384                  ~            ~    -37.11%        -63.33%
Mul/P521                  ~            ~    -26.62%        -12.42%
Square/P224          +1.46%            ~    -40.62%        -49.18%
Square/P384               ~            ~    -45.51%        -69.68%
Square/P521         +90.37%            ~    -25.26%        -11.23%

(The +90% is a separate problem and not real; that much variation
can be seen on that system by running the same binary from two
different files.)

pkg: crypto/internal/fips140/edwards25519

benchmark \ host                    s7  linux-amd64  linux-386  s7:GOARCH=386
                               vs base      vs base    vs base        vs base
EncodingDecoding                     ~            ~    -34.67%        -35.75%
ScalarBaseMult                       ~            ~    -31.25%        -30.29%
ScalarMult                           ~            ~    -33.45%        -32.54%
VarTimeDoubleScalarBaseMult          ~            ~    -33.78%        -33.68%

Change-Id: Id3c91d42cd01def6731b755e99f8f40c6ad1bb65
Reviewed-on: https://go-review.googlesource.com/c/go/+/716061
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Russ Cox <rsc@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
2025-10-30 08:04:20 -07:00

1170 lines
25 KiB
Go

// asmcheck
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
// Div and mod rewrites, testing cmd/compile/internal/ssa/_gen/divmod.rules.
// See comments there for "Case 1" etc.
// Convert multiplication by a power of two to a shift.
func mul32_uint8(i uint8) uint8 {
// 386: "SHLL [$]5,"
// arm64: "LSL [$]5,"
return i * 32
}
func mul32_uint16(i uint16) uint16 {
// 386: "SHLL [$]5,"
// arm64: "LSL [$]5,"
return i * 32
}
func mul32_uint32(i uint32) uint32 {
// 386: "SHLL [$]5,"
// arm64: "LSL [$]5,"
return i * 32
}
func mul32_uint64(i uint64) uint64 {
// 386: "SHLL [$]5,"
// 386: "SHRL [$]27,"
// arm64: "LSL [$]5,"
return i * 32
}
func mulNeg32_int8(i int8) int8 {
// 386: "SHLL [$]5,"
// 386: "NEGL"
// arm64: "NEG R[0-9]+<<5,"
return i * -32
}
func mulNeg32_int16(i int16) int16 {
// 386: "SHLL [$]5,"
// 386: "NEGL"
// arm64: "NEG R[0-9]+<<5,"
return i * -32
}
func mulNeg32_int32(i int32) int32 {
// 386: "SHLL [$]5,"
// 386: "NEGL"
// arm64: "NEG R[0-9]+<<5,"
return i * -32
}
func mulNeg32_int64(i int64) int64 {
// 386: "SHLL [$]5,"
// 386: "SHRL [$]27,"
// 386: "SBBL"
// arm64: "NEG R[0-9]+<<5,"
return i * -32
}
// Signed divide by power of 2.
func div32_int8(i int8) int8 {
// 386: "SARB [$]7,"
// 386: "SHRB [$]3,"
// 386: "ADDL"
// 386: "SARB [$]5,"
// arm64: "SBFX [$]7, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>3,"
// arm64: "SBFX [$]5, R[0-9]+, [$]3,"
return i / 32
}
func div32_int16(i int16) int16 {
// 386: "SARW [$]15,"
// 386: "SHRW [$]11,"
// 386: "ADDL"
// 386: "SARW [$]5,"
// arm64: "SBFX [$]15, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>11,"
// arm64: "SBFX [$]5, R[0-9]+, [$]11,"
return i / 32
}
func div32_int32(i int32) int32 {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "ADDL"
// 386: "SARL [$]5,"
// arm64: "SBFX [$]31, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>27,"
// arm64: "SBFX [$]5, R[0-9]+, [$]27,"
return i / 32
}
func div32_int64(i int64) int64 {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "ADDL"
// 386: "SARL [$]5,"
// 386: "SHRL [$]5,"
// 386: "SHLL [$]27,"
// arm64: "ASR [$]63,"
// arm64: "ADD R[0-9]+>>59,"
// arm64: "ASR [$]5,"
return i / 32
}
// Case 1. Signed divides where 2N ≤ register size.
func div7_int8(i int8) int8 {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]147,"
// 386: "SARL [$]10,"
// 386: "SUBL"
// arm64: "MOVD [$]147,"
// arm64: "MULW"
// arm64: "SBFX [$]10, R[0-9]+, [$]22,"
// arm64: "SUB R[0-9]+->31,"
return i / 7
}
func div7_int16(i int16) int16 {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]37450,"
// 386: "SARL [$]18,"
// 386: "SUBL"
// arm64: "MOVD [$]37450,"
// arm64: "MULW"
// arm64: "SBFX [$]18, R[0-9]+, [$]14,"
// arm64: "SUB R[0-9]+->31,"
return i / 7
}
func div7_int32(i int32) int32 {
// 64-bit only
// arm64: "MOVD [$]2454267027,"
// arm64: "MUL "
// arm64: "ASR [$]34,"
// arm64: "SUB R[0-9]+->63,"
return i / 7
}
// Case 2. Signed divides where m is even.
func div9_int32(i int32) int32 {
// 386: "SARL [$]31,"
// 386: "MOVL [$]1908874354,"
// 386: "IMULL"
// 386: "SARL [$]2,"
// 386: "SUBL"
// arm64: "MOVD [$]3817748708,"
// arm64: "MUL "
// arm64: "ASR [$]35,"
// arm64: "SUB R[0-9]+->63,"
return i / 9
}
func div7_int64(i int64) int64 {
// 64-bit only
// arm64 MOVD $5270498306774157605, SMULH, ASR $1, SUB ->63
// arm64: "MOVD [$]5270498306774157605,"
// arm64: "SMULH"
// arm64: "ASR [$]1,"
// arm64: "SUB R[0-9]+->63,"
return i / 7
}
// Case 3. Signed divides where m is odd.
func div3_int32(i int32) int32 {
// 386: "SARL [$]31,"
// 386: "MOVL [$]-1431655765,"
// 386: "IMULL"
// 386: "SARL [$]1,"
// 386: "SUBL"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL"
// arm64: "ASR [$]33,"
// arm64: "SUB R[0-9]+->63,"
return i / 3
}
func div3_int64(i int64) int64 {
// 64-bit only
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH"
// arm64: "ADD"
// arm64: "ASR [$]1,"
// arm64: "SUB R[0-9]+->63,"
return i / 3
}
// Case 4. Unsigned divide where x < 1<<(N-1).
func div7_int16u(i int16) int16 {
if i < 0 {
return 0
}
// 386: "IMUL3L [$]37450,"
// 386: "SHRL [$]18,"
// 386: -"SUBL"
// arm64: "MOVD [$]37450,"
// arm64: "MULW"
// arm64: "UBFX [$]18, R[0-9]+, [$]14,"
// arm64: -"SUB"
return i / 7
}
func div7_int32u(i int32) int32 {
if i < 0 {
return 0
}
// 386: "MOVL [$]-1840700269,"
// 386: "MULL"
// 386: "SHRL [$]2"
// 386: -"SUBL"
// arm64: "MOVD [$]2454267027,"
// arm64: "MUL"
// arm64: "LSR [$]34,"
// arm64: -"SUB"
return i / 7
}
func div7_int64u(i int64) int64 {
// 64-bit only
if i < 0 {
return 0
}
// arm64: "MOVD [$]-7905747460161236406,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
// arm64: -"SUB"
return i / 7
}
// Case 5. Unsigned divide where 2N+1 ≤ register size.
func div7_uint8(i uint8) uint8 {
// 386: "IMUL3L [$]293,"
// 386: "SHRL [$]11,"
// arm64: "MOVD [$]293,"
// arm64: "MULW"
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
return i / 7
}
func div7_uint16(i uint16) uint16 {
// only 64-bit
// arm64: "MOVD [$]74899,"
// arm64: "MUL"
// arm64: "LSR [$]19,"
return i / 7
}
// Case 6. Unsigned divide where m is even.
func div3_uint16(i uint16) uint16 {
// 386: "IMUL3L [$]43691," "SHRL [$]17,"
// arm64: "MOVD [$]87382,"
// arm64: "MUL"
// arm64: "LSR [$]18,"
return i / 3
}
func div3_uint32(i uint32) uint32 {
// 386: "MOVL [$]-1431655765," "MULL", "SHRL [$]1,"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL"
// arm64: "LSR [$]33,"
return i / 3
}
func div3_uint64(i uint64) uint64 {
// 386: "MOVL [$]-1431655766"
// 386: "MULL"
// 386: "SHRL [$]1"
// 386 -".*CALL"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]1,"
return i / 3
}
// Case 7. Unsigned divide where c is even.
func div14_uint16(i uint16) uint16 {
// 32-bit only
// 386: "SHRL [$]1,"
// 386: "IMUL3L [$]37450,"
// 386: "SHRL [$]18,"
return i / 14
}
func div14_uint32(i uint32) uint32 {
// 386: "SHRL [$]1,"
// 386: "MOVL [$]-1840700269,"
// 386: "SHRL [$]2,"
// arm64: "UBFX [$]1, R[0-9]+, [$]31,"
// arm64: "MOVD [$]2454267027,"
// arm64: "MUL"
// arm64: "LSR [$]34,"
return i / 14
}
func div14_uint64(i uint64) uint64 {
// 386: "MOVL [$]-1840700270,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: -".*CALL"
// arm64: "MOVD [$]-7905747460161236406,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
return i / 14
}
// Case 8. Unsigned divide on systems with avg.
func div7_uint16a(i uint16) uint16 {
// only 32-bit
// 386: "SHLL [$]16,"
// 386: "IMUL3L [$]9363,"
// 386: "ADDL"
// 386: "RCRL [$]1,"
// 386: "SHRL [$]18,"
return i / 7
}
func div7_uint32(i uint32) uint32 {
// 386: "MOVL [$]613566757,"
// 386: "MULL"
// 386: "ADDL"
// 386: "RCRL [$]1,"
// 386: "SHRL [$]2,"
// arm64: "UBFIZ [$]32, R[0-9]+, [$]32,"
// arm64: "MOVD [$]613566757,"
// arm64: "MUL"
// arm64: "SUB"
// arm64: "ADD R[0-9]+>>1,"
// arm64: "LSR [$]34,"
return i / 7
}
func div7_uint64(i uint64) uint64 {
// 386: "MOVL [$]-1840700269,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: -".*CALL"
// arm64: "MOVD [$]2635249153387078803,"
// arm64: "UMULH"
// arm64: "SUB",
// arm64: "ADD R[0-9]+>>1,"
// arm64: "LSR [$]2,"
return i / 7
}
func div12345_uint64(i uint64) uint64 {
// 386: "MOVL [$]-1444876402,"
// 386: "MOVL [$]835683390,"
// 386: "MULL"
// 386: "SHRL [$]13,"
// 386: "SHLL [$]19,"
// arm64: "MOVD [$]-6205696892516465602,"
// arm64: "UMULH"
// arm64: "LSR [$]13,"
return i / 12345
}
// Divisibility and non-divisibility by power of two.
func divis32_uint8(i uint8) bool {
// 386: "TESTB [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_uint8(i uint8) bool {
// 386: "TESTB [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_uint16(i uint16) bool {
// 386: "TESTW [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_uint16(i uint16) bool {
// 386: "TESTW [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_uint32(i uint32) bool {
// 386: "TESTL [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_uint32(i uint32) bool {
// 386: "TESTL [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_uint64(i uint64) bool {
// 386: "TESTL [$]31,"
// arm64: "TST [$]31,"
return i%32 == 0
}
func ndivis32_uint64(i uint64) bool {
// 386: "TESTL [$]31,"
// arm64: "TST [$]31,"
return i%32 != 0
}
func divis32_int8(i int8) bool {
// 386: "TESTB [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_int8(i int8) bool {
// 386: "TESTB [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_int16(i int16) bool {
// 386: "TESTW [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_int16(i int16) bool {
// 386: "TESTW [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_int32(i int32) bool {
// 386: "TESTL [$]31,"
// arm64: "TSTW [$]31,"
return i%32 == 0
}
func ndivis32_int32(i int32) bool {
// 386: "TESTL [$]31,"
// arm64: "TSTW [$]31,"
return i%32 != 0
}
func divis32_int64(i int64) bool {
// 386: "TESTL [$]31,"
// arm64: "TST [$]31,"
return i%32 == 0
}
func ndivis32_int64(i int64) bool {
// 386: "TESTL [$]31,"
// arm64: "TST [$]31,"
return i%32 != 0
}
// Divide with divisibility check; reuse divide intermediate mod.
func div_divis32_uint8(i uint8) (uint8, bool) {
// 386: "SHRB [$]5,"
// 386: "TESTB [$]31,",
// 386: "SETEQ"
// arm64: "UBFX [$]5, R[0-9]+, [$]3"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_uint8(i uint8) (uint8, bool) {
// 386: "SHRB [$]5,"
// 386: "TESTB [$]31,",
// 386: "SETNE"
// arm64: "UBFX [$]5, R[0-9]+, [$]3"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_uint16(i uint16) (uint16, bool) {
// 386: "SHRW [$]5,"
// 386: "TESTW [$]31,",
// 386: "SETEQ"
// arm64: "UBFX [$]5, R[0-9]+, [$]11"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_uint16(i uint16) (uint16, bool) {
// 386: "SHRW [$]5,"
// 386: "TESTW [$]31,",
// 386: "SETNE"
// arm64: "UBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_uint32(i uint32) (uint32, bool) {
// 386: "SHRL [$]5,"
// 386: "TESTL [$]31,",
// 386: "SETEQ"
// arm64: "UBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_uint32(i uint32) (uint32, bool) {
// 386: "SHRL [$]5,"
// 386: "TESTL [$]31,",
// 386: "SETNE"
// arm64: "UBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_uint64(i uint64) (uint64, bool) {
// 386: "SHRL [$]5,"
// 386: "SHLL [$]27,"
// 386: "TESTL [$]31,",
// 386: "SETEQ"
// arm64: "LSR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_uint64(i uint64) (uint64, bool) {
// 386: "SHRL [$]5,"
// 386: "SHLL [$]27,"
// 386: "TESTL [$]31,",
// 386: "SETNE"
// arm64: "LSR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_int8(i int8) (int8, bool) {
// 386: "SARB [$]7,"
// 386: "SHRB [$]3,"
// 386: "SARB [$]5,"
// 386: "TESTB [$]31,",
// 386: "SETEQ"
// arm64: "SBFX [$]7, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>3,"
// arm64: "SBFX [$]5, R[0-9]+, [$]3,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_int8(i int8) (int8, bool) {
// 386: "SARB [$]7,"
// 386: "SHRB [$]3,"
// 386: "SARB [$]5,"
// 386: "TESTB [$]31,",
// 386: "SETNE"
// arm64: "SBFX [$]7, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>3,"
// arm64: "SBFX [$]5, R[0-9]+, [$]3,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_int16(i int16) (int16, bool) {
// 386: "SARW [$]15,"
// 386: "SHRW [$]11,"
// 386: "SARW [$]5,"
// 386: "TESTW [$]31,",
// 386: "SETEQ"
// arm64: "SBFX [$]15, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>11,"
// arm64: "SBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_int16(i int16) (int16, bool) {
// 386: "SARW [$]15,"
// 386: "SHRW [$]11,"
// 386: "SARW [$]5,"
// 386: "TESTW [$]31,",
// 386: "SETNE"
// arm64: "SBFX [$]15, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>11,"
// arm64: "SBFX [$]5, R[0-9]+, [$]11,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_int32(i int32) (int32, bool) {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "SARL [$]5,"
// 386: "TESTL [$]31,",
// 386: "SETEQ"
// arm64: "SBFX [$]31, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>27,"
// arm64: "SBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_int32(i int32) (int32, bool) {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "SARL [$]5,"
// 386: "TESTL [$]31,",
// 386: "SETNE"
// arm64: "SBFX [$]31, R[0-9]+, [$]1,"
// arm64: "ADD R[0-9]+>>27,"
// arm64: "SBFX [$]5, R[0-9]+, [$]27,"
// arm64: "TSTW [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
func div_divis32_int64(i int64) (int64, bool) {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "SARL [$]5,"
// 386: "SHLL [$]27,"
// 386: "TESTL [$]31,",
// 386: "SETEQ"
// arm64: "ASR [$]63,"
// arm64: "ADD R[0-9]+>>59,"
// arm64: "ASR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET EQ"
return i/32, i%32 == 0
}
func div_ndivis32_int64(i int64) (int64, bool) {
// 386: "SARL [$]31,"
// 386: "SHRL [$]27,"
// 386: "SARL [$]5,"
// 386: "SHLL [$]27,"
// 386: "TESTL [$]31,",
// 386: "SETNE"
// arm64: "ASR [$]63,"
// arm64: "ADD R[0-9]+>>59,"
// arm64: "ASR [$]5,"
// arm64: "TST [$]31,"
// arm64: "CSET NE"
return i/32, i%32 != 0
}
// Divisibility and non-divisibility by non-power-of-two.
func divis6_uint8(i uint8) bool {
// 386: "IMUL3L [$]-85,"
// 386: "ROLB [$]7,"
// 386: "CMPB .*, [$]42"
// 386: "SETLS"
// arm64: "MOVD [$]-85,"
// arm64: "MULW"
// arm64: "UBFX [$]1, R[0-9]+, [$]7,"
// arm64: "ORR R[0-9]+<<7"
// arm64: "CMPW [$]42,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_uint8(i uint8) bool {
// 386: "IMUL3L [$]-85,"
// 386: "ROLB [$]7,"
// 386: "CMPB .*, [$]42"
// 386: "SETHI"
// arm64: "MOVD [$]-85,"
// arm64: "MULW"
// arm64: "UBFX [$]1, R[0-9]+, [$]7,"
// arm64: "ORR R[0-9]+<<7"
// arm64: "CMPW [$]42,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_uint16(i uint16) bool {
// 386: "IMUL3L [$]-21845,"
// 386: "ROLW [$]15,"
// 386: "CMPW .*, [$]10922"
// 386: "SETLS"
// arm64: "MOVD [$]-21845,"
// arm64: "MULW"
// arm64: "ORR R[0-9]+<<16"
// arm64: "RORW [$]17,"
// arm64: "MOVD [$]10922,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_uint16(i uint16) bool {
// 386: "IMUL3L [$]-21845,"
// 386: "ROLW [$]15,"
// 386: "CMPW .*, [$]10922"
// 386: "SETHI"
// arm64: "MOVD [$]-21845,"
// arm64: "MULW"
// arm64: "ORR R[0-9]+<<16"
// arm64: "RORW [$]17,"
// arm64: "MOVD [$]10922,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_uint32(i uint32) bool {
// 386: "IMUL3L [$]-1431655765,"
// 386: "ROLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETLS"
// arm64: "MOVD [$]-1431655765,"
// arm64: "MULW"
// arm64: "RORW [$]1,"
// arm64: "MOVD [$]715827882,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_uint32(i uint32) bool {
// 386: "IMUL3L [$]-1431655765,"
// 386: "ROLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETHI"
// arm64: "MOVD [$]-1431655765,"
// arm64: "MULW"
// arm64: "RORW [$]1,"
// arm64: "MOVD [$]715827882,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_uint64(i uint64) bool {
// 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]1,"
// 386: "SHLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETLS"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "ROR [$]1,"
// arm64: "MOVD [$]3074457345618258602,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_uint64(i uint64) bool {
// 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]1,"
// 386: "SHLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETHI"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "ROR [$]1,"
// arm64: "MOVD [$]3074457345618258602,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_int8(i int8) bool {
// 386: "IMUL3L [$]-85,"
// 386: "ADDL [$]42,"
// 386: "ROLB [$]7,"
// 386: "CMPB .*, [$]42"
// 386: "SETLS"
// arm64: "MOVD [$]-85,"
// arm64: "MULW"
// arm64: "ADD [$]42,"
// arm64: "UBFX [$]1, R[0-9]+, [$]7,"
// arm64: "ORR R[0-9]+<<7"
// arm64: "CMPW [$]42,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_int8(i int8) bool {
// 386: "IMUL3L [$]-85,"
// 386: "ADDL [$]42,"
// 386: "ROLB [$]7,"
// 386: "CMPB .*, [$]42"
// 386: "SETHI"
// arm64: "MOVD [$]-85,"
// arm64: "MULW"
// arm64: "ADD [$]42,"
// arm64: "UBFX [$]1, R[0-9]+, [$]7,"
// arm64: "ORR R[0-9]+<<7"
// arm64: "CMPW [$]42,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_int16(i int16) bool {
// 386: "IMUL3L [$]-21845,"
// 386: "ADDL [$]10922,"
// 386: "ROLW [$]15,"
// 386: "CMPW .*, [$]10922"
// 386: "SETLS"
// arm64: "MOVD [$]-21845,"
// arm64: "MULW"
// arm64: "MOVD [$]10922,"
// arm64: "ADD "
// arm64: "ORR R[0-9]+<<16"
// arm64: "RORW [$]17,"
// arm64: "MOVD [$]10922,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_int16(i int16) bool {
// 386: "IMUL3L [$]-21845,"
// 386: "ADDL [$]10922,"
// 386: "ROLW [$]15,"
// 386: "CMPW .*, [$]10922"
// 386: "SETHI"
// arm64: "MOVD [$]-21845,"
// arm64: "MULW"
// arm64: "MOVD [$]10922,"
// arm64: "ADD "
// arm64: "ORR R[0-9]+<<16"
// arm64: "RORW [$]17,"
// arm64: "MOVD [$]10922,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_int32(i int32) bool {
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADDL [$]715827882,"
// 386: "ROLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETLS"
// arm64: "MOVD [$]-1431655765,"
// arm64: "MULW"
// arm64: "MOVD [$]715827882,"
// arm64: "ADD "
// arm64: "RORW [$]1,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_int32(i int32) bool {
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADDL [$]715827882,"
// 386: "ROLL [$]31,"
// 386: "CMPL .*, [$]715827882"
// 386: "SETHI"
// arm64: "MOVD [$]-1431655765,"
// arm64: "MULW"
// arm64: "MOVD [$]715827882,"
// arm64: "ADD "
// arm64: "RORW [$]1,"
// arm64: "CSET HI"
return i%6 != 0
}
func divis6_int64(i int64) bool {
// 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADCL [$]715827882,"
// 386: "CMPL .*, [$]715827882"
// 386: "CMPL .*, [$]-1431655766"
// 386: "SETLS"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602,"
// arm64: "ADD "
// arm64: "ROR [$]1,"
// arm64: "CSET LS"
return i%6 == 0
}
func ndivis6_int64(i int64) bool {
// 386: "IMUL3L [$]-1431655766,"
// 386: "IMUL3L [$]-1431655765,"
// 386: "ADCL [$]715827882,"
// 386: "CMPL .*, [$]715827882"
// 386: "CMPL .*, [$]-1431655766"
// 386: "SETHI"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "MUL "
// arm64: "MOVD [$]3074457345618258602,"
// arm64: "ADD "
// arm64: "ROR [$]1,"
// arm64: "CSET HI"
return i%6 != 0
}
func div_divis6_uint8(i uint8) (uint8, bool) {
// 386: "IMUL3L [$]342,"
// 386: "SHRL [$]11,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]342,"
// arm64: "MULW"
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_uint8(i uint8) (uint8, bool) {
// 386: "IMUL3L [$]342,"
// 386: "SHRL [$]11,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]342,"
// arm64: "MULW"
// arm64: "UBFX [$]11, R[0-9]+, [$]21,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_uint16(i uint16) (uint16, bool) {
// 386: "IMUL3L [$]43691,"
// 386: "SHRL [$]18,"
// 386: "SHLL [$]1,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]87382,"
// arm64: "MUL "
// arm64: "LSR [$]19,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_uint16(i uint16) (uint16, bool) {
// 386: "IMUL3L [$]43691,"
// 386: "SHRL [$]18,"
// 386: "SHLL [$]1,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]87382,"
// arm64: "MUL "
// arm64: "LSR [$]19,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_uint32(i uint32) (uint32, bool) {
// 386: "MOVL [$]-1431655765,"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]1,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL "
// arm64: "LSR [$]34,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_uint32(i uint32) (uint32, bool) {
// 386: "MOVL [$]-1431655765,"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]1,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL "
// arm64: "LSR [$]34,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_uint64(i uint64) (uint64, bool) {
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]30,"
// 386: "SETEQ"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_uint64(i uint64) (uint64, bool) {
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "MULL"
// 386: "SHRL [$]2,"
// 386: "SHLL [$]30,"
// 386: "SETNE"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "UMULH"
// arm64: "LSR [$]2,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_int8(i int8) (int8, bool) {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]171,"
// 386: "SARL [$]10,"
// 386: "SHLL [$]1,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]171,"
// arm64: "MULW"
// arm64: "SBFX [$]10, R[0-9]+, [$]22,"
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_int8(i int8) (int8, bool) {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]171,"
// 386: "SARL [$]10,"
// 386: "SHLL [$]1,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]171,"
// arm64: "MULW"
// arm64: "SBFX [$]10, R[0-9]+, [$]22,"
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_int16(i int16) (int16, bool) {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]43691,"
// 386: "SARL [$]18,"
// 386: "SHLL [$]1,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]43691,"
// arm64: "MULW"
// arm64: "SBFX [$]18, R[0-9]+, [$]14,"
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_int16(i int16) (int16, bool) {
// 386: "SARL [$]31,"
// 386: "IMUL3L [$]43691,"
// 386: "SARL [$]18,"
// 386: "SHLL [$]1,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]43691,"
// arm64: "MULW"
// arm64: "SBFX [$]18, R[0-9]+, [$]14,"
// arm64: "SUB R[0-9]+->31,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_int32(i int32) (int32, bool) {
// 386: "SARL [$]31,"
// 386: "MOVL [$]-1431655765,"
// 386: "IMULL"
// 386: "SARL [$]2,"
// 386: "SHLL [$]1,"
// 386: "SETEQ"
// 386: -"RO[RL]"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL "
// arm64: "ASR [$]34,"
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_int32(i int32) (int32, bool) {
// 386: "SARL [$]31,"
// 386: "MOVL [$]-1431655765,"
// 386: "IMULL"
// 386: "SARL [$]2,"
// 386: "SHLL [$]1,"
// 386: "SETNE"
// 386: -"RO[RL]"
// arm64: "MOVD [$]2863311531,"
// arm64: "MUL "
// arm64: "ASR [$]34,"
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}
func div_divis6_int64(i int64) (int64, bool) {
// 386: "ANDL [$]-1431655766,"
// 386: "ANDL [$]-1431655765,"
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "SUBL" "SBBL"
// 386: "MULL"
// 386: "SETEQ"
// 386: -"SET(LS|HI)"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH"
// arm64: "ADD"
// arm64: "ASR [$]2,"
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET EQ"
// arm64: -"RO[RL]"
return i/6, i%6 == 0
}
func div_ndivis6_int64(i int64) (int64, bool) {
// 386: "ANDL [$]-1431655766,"
// 386: "ANDL [$]-1431655765,"
// 386: "MOVL [$]-1431655766,"
// 386: "MOVL [$]-1431655765,"
// 386: "SUBL" "SBBL"
// 386: "MULL"
// 386: "SETNE"
// 386: -"SET(LS|HI)"
// 386: -".*CALL"
// 386: -"RO[RL]"
// arm64: "MOVD [$]-6148914691236517205,"
// arm64: "SMULH"
// arm64: "ADD"
// arm64: "ASR [$]2,"
// arm64: "SUB R[0-9]+->63,"
// arm64: "CSET NE"
// arm64: -"RO[RL]"
return i/6, i%6 != 0
}