cmd/compile: add signed divisibility rules

"Division by invariant integers using multiplication" paper
by Granlund and Montgomery contains a method for directly computing
divisibility (x%c == 0 for c constant) by means of the modular inverse.
The method is further elaborated in "Hacker's Delight" by Warren Section 10-17

This general rule can compute divisibilty by one multiplication, and add
and a compare for odd divisors and an additional rotate for even divisors.

To apply the divisibility rule, we must take into account
the rules to rewrite x%c = x-((x/c)*c) and (x/c) for c constant on the first
optimization pass "opt".  This complicates the matching as we want to match
only in the cases where the result of (x/c) is not also needed.
So, we must match on the expanded form of (x/c) in the expression x == c*(x/c)
in the "late opt" pass after common subexpresion elimination.

Note, that if there is an intermediate opt pass introduced in the future we
could simplify these rules by delaying the magic division rewrite to "late opt"
and matching directly on (x/c) in the intermediate opt pass.

On amd64, the divisibility check is 30-45% faster.

name                     old time/op  new time/op  delta`
DivisiblePow2constI64-4  0.83ns ± 1%  0.82ns ± 0%     ~     (p=0.079 n=5+4)
DivisibleconstI64-4      2.68ns ± 1%  1.87ns ± 0%  -30.33%  (p=0.000 n=5+4)
DivisibleWDivconstI64-4  2.69ns ± 1%  2.71ns ± 3%     ~     (p=1.000 n=5+5)
DivisiblePow2constI32-4  1.15ns ± 1%  1.15ns ± 0%     ~     (p=0.238 n=5+4)
DivisibleconstI32-4      2.24ns ± 1%  1.20ns ± 0%  -46.48%  (p=0.016 n=5+4)
DivisibleWDivconstI32-4  2.27ns ± 1%  2.27ns ± 1%     ~     (p=0.683 n=5+5)
DivisiblePow2constI16-4  0.81ns ± 1%  0.82ns ± 1%     ~     (p=0.135 n=5+5)
DivisibleconstI16-4      2.11ns ± 2%  1.20ns ± 1%  -42.99%  (p=0.008 n=5+5)
DivisibleWDivconstI16-4  2.23ns ± 0%  2.27ns ± 2%   +1.79%  (p=0.029 n=4+4)
DivisiblePow2constI8-4   0.81ns ± 1%  0.81ns ± 1%     ~     (p=0.286 n=5+5)
DivisibleconstI8-4       2.13ns ± 3%  1.19ns ± 1%  -43.84%  (p=0.008 n=5+5)
DivisibleWDivconstI8-4   2.23ns ± 1%  2.25ns ± 1%     ~     (p=0.183 n=5+5)

Fixes #30282
Fixes #15806

Change-Id: Id20d78263a4fdfe0509229ae4dfa2fede83fc1d0
Reviewed-on: https://go-review.googlesource.com/c/go/+/173998
Run-TryBot: Brian Kessler <brian.m.kessler@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Brian Kessler 2019-04-05 14:05:07 -06:00 committed by Brad Fitzpatrick
parent e7d08b6fe6
commit 4d9dd35806
6 changed files with 7258 additions and 62 deletions

View file

@ -184,7 +184,7 @@ func TestMagicSigned(t *testing.T) {
-c - 1, -c, -c + 1, c - 1, c, c + 1,
-2*c - 1, -2 * c, -2*c + 1, 2*c - 1, 2 * c, 2*c + 1,
-mul - 1, -mul, -mul + 1, mul - 1, mul, mul + 1,
int64(1)<<n - 1, -int64(1)<<n + 1,
int64(1)<<(n-1) - 1, -int64(1) << (n - 1),
} {
X := new(big.Int).SetInt64(x)
if X.Cmp(Min) < 0 || X.Cmp(Max) > 0 {
@ -303,3 +303,108 @@ func TestDivisibleUnsigned(t *testing.T) {
}
}
}
func testDivisibleExhaustive(t *testing.T, n uint) {
minI := -int64(1) << (n - 1)
maxI := int64(1) << (n - 1)
for c := int64(1); c < maxI; c++ {
if !sdivisibleOK(n, int64(c)) {
continue
}
k := sdivisible(n, int64(c)).k
m := sdivisible(n, int64(c)).m
a := sdivisible(n, int64(c)).a
max := sdivisible(n, int64(c)).max
mask := ^uint64(0) >> (64 - n)
for i := minI; i < maxI; i++ {
want := i%c == 0
mul := (uint64(i)*m + a) & mask
rot := (mul>>uint(k) | mul<<(n-uint(k))) & mask
got := rot <= max
if want != got {
t.Errorf("signed divisible wrong for %d %% %d == 0: got %v, want %v (k=%d,m=%d,a=%d,max=%d)\n", i, c, got, want, k, m, a, max)
}
}
}
}
func TestDivisibleExhaustive8(t *testing.T) {
testDivisibleExhaustive(t, 8)
}
func TestDivisibleExhaustive16(t *testing.T) {
if testing.Short() {
t.Skip("slow test; skipping")
}
testDivisibleExhaustive(t, 16)
}
func TestDivisibleSigned(t *testing.T) {
One := new(big.Int).SetInt64(1)
for _, n := range [...]uint{8, 16, 32, 64} {
TwoNMinusOne := new(big.Int).Lsh(One, n-1)
Max := new(big.Int).Sub(TwoNMinusOne, One)
Min := new(big.Int).Neg(TwoNMinusOne)
for _, c := range [...]int64{
3,
5,
6,
7,
9,
10,
11,
12,
13,
14,
15,
17,
1<<7 - 1,
1<<7 + 1,
1<<15 - 1,
1<<15 + 1,
1<<31 - 1,
1<<31 + 1,
1<<63 - 1,
} {
if c>>(n-1) != 0 {
continue // not appropriate for the given n.
}
if !sdivisibleOK(n, int64(c)) {
t.Errorf("expected n=%d c=%d to pass\n", n, c)
}
k := sdivisible(n, int64(c)).k
m := sdivisible(n, int64(c)).m
a := sdivisible(n, int64(c)).a
max := sdivisible(n, int64(c)).max
mask := ^uint64(0) >> (64 - n)
C := new(big.Int).SetInt64(c)
// Find largest multiple of c.
Mul := new(big.Int).Div(Max, C)
Mul.Mul(Mul, C)
mul := Mul.Int64()
// Try some input values, mostly around multiples of c.
for _, x := range [...]int64{
-1, 1,
-c - 1, -c, -c + 1, c - 1, c, c + 1,
-2*c - 1, -2 * c, -2*c + 1, 2*c - 1, 2 * c, 2*c + 1,
-mul - 1, -mul, -mul + 1, mul - 1, mul, mul + 1,
int64(1)<<(n-1) - 1, -int64(1) << (n - 1),
} {
X := new(big.Int).SetInt64(x)
if X.Cmp(Min) < 0 || X.Cmp(Max) > 0 {
continue
}
want := x%c == 0
mul := (uint64(x)*m + a) & mask
rot := (mul>>uint(k) | mul<<(n-uint(k))) & mask
got := rot <= max
if want != got {
t.Errorf("signed divisible wrong for %d %% %d == 0: got %v, want %v (k=%d,m=%d,a=%d,max=%d)\n", x, c, got, want, k, m, a, max)
}
}
}
}
}