cmd/compile: optimize multiplication rules on loong64

Improve multiplication strength reduction, refer to CL 626998,
add additional 3 linear combination instructions for loong64.

goos: linux
goarch: loong64
pkg: cmd/compile/internal/test
cpu: Loongson-3A6000-HV @ 2500.00MHz
                  |  bench.old   |              bench.new               |
                  |    sec/op    |    sec/op     vs base                |
MulconstI32/3       1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstI32/5       1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstI32/12       1.601n ± 0%    1.201n ± 0%  -24.98% (p=0.000 n=10)
MulconstI32/120     1.6010n ± 0%   0.8130n ± 0%  -49.22% (p=0.000 n=10)
MulconstI32/-120    1.6010n ± 0%   0.8109n ± 0%  -49.35% (p=0.000 n=10)
MulconstI32/65537   1.6275n ± 0%   0.8005n ± 0%  -50.81% (p=0.000 n=10)
MulconstI32/65538   1.6290n ± 0%   0.8004n ± 0%  -50.87% (p=0.000 n=10)
MulconstI64/3       1.6010n ± 0%   0.8004n ± 0%  -50.01% (p=0.000 n=10)
MulconstI64/5       1.6010n ± 0%   0.8004n ± 0%  -50.01% (p=0.000 n=10)
MulconstI64/12       1.601n ± 0%    1.201n ± 0%  -24.98% (p=0.000 n=10)
MulconstI64/120     1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstI64/-120    1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstI64/65537   1.6270n ± 0%   0.8005n ± 0%  -50.80% (p=0.000 n=10)
MulconstI64/65538   1.6290n ± 0%   0.8071n ± 1%  -50.45% (p=0.000 n=10)
MulconstU32/3       1.6010n ± 0%   0.8004n ± 0%  -50.01% (p=0.000 n=10)
MulconstU32/5       1.6010n ± 0%   0.8004n ± 0%  -50.01% (p=0.000 n=10)
MulconstU32/12       1.601n ± 0%    1.201n ± 0%  -24.98% (p=0.000 n=10)
MulconstU32/120     1.6010n ± 0%   0.8066n ± 0%  -49.62% (p=0.000 n=10)
MulconstU32/65537   1.6290n ± 0%   0.8005n ± 0%  -50.86% (p=0.000 n=10)
MulconstU32/65538   1.6280n ± 0%   0.8005n ± 0%  -50.83% (p=0.000 n=10)
MulconstU64/3       1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstU64/5       1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstU64/12       1.601n ± 0%    1.201n ± 0%  -24.98% (p=0.000 n=10)
MulconstU64/120     1.6010n ± 0%   0.8005n ± 0%  -50.00% (p=0.000 n=10)
MulconstU64/65537   1.6290n ± 0%   0.8005n ± 0%  -50.86% (p=0.000 n=10)
MulconstU64/65538   1.6300n ± 0%   0.8067n ± 0%  -50.51% (p=0.000 n=10)
geomean              1.609n        0.8537n       -46.95%

goos: linux
goarch: loong64
pkg: cmd/compile/internal/test
cpu: Loongson-3A5000 @ 2500.00MHz
                  |  bench.old   |              bench.new               |
                  |    sec/op    |    sec/op     vs base                |
MulconstI32/3       1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstI32/5       1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstI32/12       1.601n ± 0%    1.202n ± 0%  -24.92% (p=0.000 n=10)
MulconstI32/120     1.6020n ± 0%   0.8012n ± 0%  -49.99% (p=0.000 n=10)
MulconstI32/-120    1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstI32/65537   1.6020n ± 0%   0.8007n ± 0%  -50.02% (p=0.000 n=10)
MulconstI32/65538   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstI64/3       1.6015n ± 0%   0.8007n ± 0%  -50.00% (p=0.000 n=10)
MulconstI64/5       1.6020n ± 0%   0.8007n ± 0%  -50.02% (p=0.000 n=10)
MulconstI64/12       1.602n ± 0%    1.202n ± 0%  -25.00% (p=0.000 n=10)
MulconstI64/120     1.6030n ± 0%   0.8011n ± 0%  -50.02% (p=0.000 n=10)
MulconstI64/-120    1.6020n ± 0%   0.8007n ± 0%  -50.02% (p=0.000 n=10)
MulconstI64/65537   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstI64/65538   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstU32/3       1.6010n ± 0%   0.8006n ± 0%  -49.99% (p=0.000 n=10)
MulconstU32/5       1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstU32/12       1.601n ± 0%    1.202n ± 0%  -24.92% (p=0.000 n=10)
MulconstU32/120     1.6010n ± 0%   0.8006n ± 0%  -49.99% (p=0.000 n=10)
MulconstU32/65537   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstU32/65538   1.6020n ± 0%   0.8009n ± 0%  -50.01% (p=0.000 n=10)
MulconstU64/3       1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstU64/5       1.6010n ± 0%   0.8007n ± 0%  -49.98% (p=0.000 n=10)
MulconstU64/12       1.601n ± 0%    1.201n ± 0%  -24.98% (p=0.000 n=10)
MulconstU64/120     1.6020n ± 0%   0.8007n ± 0%  -50.02% (p=0.000 n=10)
MulconstU64/65537   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
MulconstU64/65538   1.6010n ± 0%   0.8007n ± 0%  -49.99% (p=0.000 n=10)
geomean              1.601n        0.8523n       -46.77%

Change-Id: I9fb0e47ca57875da171a347bf4828adfab41b875
Reviewed-on: https://go-review.googlesource.com/c/go/+/675455
Reviewed-by: Mark Freeman <mark@golang.org>
Reviewed-by: abner chenc <chenguoqi@loongson.cn>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: Keith Randall <khr@golang.org>
This commit is contained in:
Xiaolin Zhao 2025-05-22 16:21:10 +08:00 committed by Gopher Robot
parent eb7f515c4d
commit e071617222
7 changed files with 190 additions and 25 deletions

View file

@ -750,10 +750,10 @@
(SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0]) (SRLVconst [rc] (MOVBUreg x)) && rc >= 8 => (MOVVconst [0])
// mul by constant // mul by constant
(MULV x (MOVVconst [-1])) => (NEGV x)
(MULV _ (MOVVconst [0])) => (MOVVconst [0]) (MULV _ (MOVVconst [0])) => (MOVVconst [0])
(MULV x (MOVVconst [1])) => x (MULV x (MOVVconst [1])) => x
(MULV x (MOVVconst [c])) && isPowerOfTwo(c) => (SLLVconst [log64(c)] x)
(MULV x (MOVVconst [c])) && canMulStrengthReduce(config, c) => {mulStrengthReduce(v, x, c)}
// div by constant // div by constant
(DIVVU x (MOVVconst [1])) => x (DIVVU x (MOVVconst [1])) => x

View file

@ -0,0 +1,6 @@
// Copyright 2025 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Prefer addition when shifting left by one.
(SLLVconst [1] x) => (ADDV x x)

View file

@ -283,6 +283,8 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
c.RegSize = 8 c.RegSize = 8
c.lowerBlock = rewriteBlockLOONG64 c.lowerBlock = rewriteBlockLOONG64
c.lowerValue = rewriteValueLOONG64 c.lowerValue = rewriteValueLOONG64
c.lateLowerBlock = rewriteBlockLOONG64latelower
c.lateLowerValue = rewriteValueLOONG64latelower
c.registers = registersLOONG64[:] c.registers = registersLOONG64[:]
c.gpRegMask = gpRegMaskLOONG64 c.gpRegMask = gpRegMaskLOONG64
c.fpRegMask = fpRegMaskLOONG64 c.fpRegMask = fpRegMaskLOONG64
@ -562,6 +564,43 @@ func (c *Config) buildRecipes(arch string) {
return m.Block.NewValue2I(m.Pos, OpARM64SUBshiftLL, m.Type, int64(i), x, y) return m.Block.NewValue2I(m.Pos, OpARM64SUBshiftLL, m.Type, int64(i), x, y)
}) })
} }
case "loong64":
// - multiply is 4 cycles.
// - add/sub/shift are 1 cycle.
// On loong64, using a multiply also needs to load the constant into a register.
// TODO: figure out a happy medium.
mulCost = 45
// add
r(1, 1, 10,
func(m, x, y *Value) *Value {
return m.Block.NewValue2(m.Pos, OpLOONG64ADDV, m.Type, x, y)
})
// neg
r(-1, 0, 10,
func(m, x, y *Value) *Value {
return m.Block.NewValue1(m.Pos, OpLOONG64NEGV, m.Type, x)
})
// sub
r(1, -1, 10,
func(m, x, y *Value) *Value {
return m.Block.NewValue2(m.Pos, OpLOONG64SUBV, m.Type, x, y)
})
// regular shifts
for i := 1; i < 64; i++ {
c := 10
if i == 1 {
// Prefer x<<1 over x+x.
// Note that we eventually reverse this decision in LOONG64latelower.rules,
// but this makes shift combining rules in LOONG64.rules simpler.
c--
}
r(1<<i, 0, c,
func(m, x, y *Value) *Value {
return m.Block.NewValue1I(m.Pos, OpLOONG64SLLVconst, m.Type, int64(i), x)
})
}
} }
c.mulRecipes = map[int64]mulRecipe{} c.mulRecipes = map[int64]mulRecipe{}
@ -628,17 +667,58 @@ func (c *Config) buildRecipes(arch string) {
} }
} }
// Currently we only process 3 linear combination instructions for loong64.
if arch == "loong64" {
// Three-instruction recipes.
// D: The first and the second are all single-instruction recipes, and they are also the third's inputs.
// E: The first single-instruction is the second's input, and the second is the third's input.
// D
for _, first := range linearCombos {
for _, second := range linearCombos {
for _, third := range linearCombos {
x := third.a*(first.a+first.b) + third.b*(second.a+second.b)
cost := first.cost + second.cost + third.cost
old := c.mulRecipes[x]
if (old.build == nil || cost < old.cost) && cost < mulCost {
c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
v1 := first.build(m, v, v)
v2 := second.build(m, v, v)
return third.build(m, v1, v2)
}}
}
}
}
}
// E
for _, first := range linearCombos {
for _, second := range linearCombos {
for _, third := range linearCombos {
x := third.a*(second.a*(first.a+first.b)+second.b) + third.b
cost := first.cost + second.cost + third.cost
old := c.mulRecipes[x]
if (old.build == nil || cost < old.cost) && cost < mulCost {
c.mulRecipes[x] = mulRecipe{cost: cost, build: func(m, v *Value) *Value {
v1 := first.build(m, v, v)
v2 := second.build(m, v1, v)
return third.build(m, v2, v)
}}
}
}
}
}
}
// These cases should be handled specially by rewrite rules. // These cases should be handled specially by rewrite rules.
// (Otherwise v * 1 == (neg (neg v))) // (Otherwise v * 1 == (neg (neg v)))
delete(c.mulRecipes, 0) delete(c.mulRecipes, 0)
delete(c.mulRecipes, 1) delete(c.mulRecipes, 1)
// Currently we assume that it doesn't help to do 3 linear
// combination instructions.
// Currently: // Currently:
// len(c.mulRecipes) == 5984 on arm64 // len(c.mulRecipes) == 5984 on arm64
// 680 on amd64 // 680 on amd64
// 5984 on loong64
// This function takes ~2.5ms on arm64. // This function takes ~2.5ms on arm64.
//println(len(c.mulRecipes)) //println(len(c.mulRecipes))
} }

View file

@ -5537,20 +5537,8 @@ func rewriteValueLOONG64_OpLOONG64MOVWstorezeroidx(v *Value) bool {
func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool { func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
v_1 := v.Args[1] v_1 := v.Args[1]
v_0 := v.Args[0] v_0 := v.Args[0]
// match: (MULV x (MOVVconst [-1])) b := v.Block
// result: (NEGV x) config := b.Func.Config
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpLOONG64MOVVconst || auxIntToInt64(v_1.AuxInt) != -1 {
continue
}
v.reset(OpLOONG64NEGV)
v.AddArg(x)
return true
}
break
}
// match: (MULV _ (MOVVconst [0])) // match: (MULV _ (MOVVconst [0]))
// result: (MOVVconst [0]) // result: (MOVVconst [0])
for { for {
@ -5578,8 +5566,8 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
break break
} }
// match: (MULV x (MOVVconst [c])) // match: (MULV x (MOVVconst [c]))
// cond: isPowerOfTwo(c) // cond: canMulStrengthReduce(config, c)
// result: (SLLVconst [log64(c)] x) // result: {mulStrengthReduce(v, x, c)}
for { for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0 x := v_0
@ -5587,12 +5575,10 @@ func rewriteValueLOONG64_OpLOONG64MULV(v *Value) bool {
continue continue
} }
c := auxIntToInt64(v_1.AuxInt) c := auxIntToInt64(v_1.AuxInt)
if !(isPowerOfTwo(c)) { if !(canMulStrengthReduce(config, c)) {
continue continue
} }
v.reset(OpLOONG64SLLVconst) v.copyOf(mulStrengthReduce(v, x, c))
v.AuxInt = int64ToAuxInt(log64(c))
v.AddArg(x)
return true return true
} }
break break

View file

@ -0,0 +1,29 @@
// Code generated from _gen/LOONG64latelower.rules using 'go generate'; DO NOT EDIT.
package ssa
func rewriteValueLOONG64latelower(v *Value) bool {
switch v.Op {
case OpLOONG64SLLVconst:
return rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v)
}
return false
}
func rewriteValueLOONG64latelower_OpLOONG64SLLVconst(v *Value) bool {
v_0 := v.Args[0]
// match: (SLLVconst [1] x)
// result: (ADDV x x)
for {
if auxIntToInt64(v.AuxInt) != 1 {
break
}
x := v_0
v.reset(OpLOONG64ADDV)
v.AddArg2(x, x)
return true
}
return false
}
func rewriteBlockLOONG64latelower(b *Block) bool {
return false
}

View file

@ -228,6 +228,7 @@ func Pow2Muls(n1, n2 int) (int, int) {
// 386:"SHLL\t[$]5",-"IMULL" // 386:"SHLL\t[$]5",-"IMULL"
// arm:"SLL\t[$]5",-"MUL" // arm:"SLL\t[$]5",-"MUL"
// arm64:"LSL\t[$]5",-"MUL" // arm64:"LSL\t[$]5",-"MUL"
// loong64:"SLLV\t[$]5",-"MULV"
// ppc64x:"SLD\t[$]5",-"MUL" // ppc64x:"SLD\t[$]5",-"MUL"
a := n1 * 32 a := n1 * 32
@ -235,6 +236,7 @@ func Pow2Muls(n1, n2 int) (int, int) {
// 386:"SHLL\t[$]6",-"IMULL" // 386:"SHLL\t[$]6",-"IMULL"
// arm:"SLL\t[$]6",-"MUL" // arm:"SLL\t[$]6",-"MUL"
// arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL` // arm64:`NEG\sR[0-9]+<<6,\sR[0-9]+`,-`LSL`,-`MUL`
// loong64:"SLLV\t[$]6",-"MULV"
// ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL" // ppc64x:"SLD\t[$]6","NEG\\sR[0-9]+,\\sR[0-9]+",-"MUL"
b := -64 * n2 b := -64 * n2
@ -255,11 +257,13 @@ func Mul_96(n int) int {
// 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL` // 386:`SHLL\t[$]5`,`LEAL\t\(.*\)\(.*\*2\),`,-`IMULL`
// arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` // arm64:`LSL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
// arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL` // arm:`SLL\t[$]5`,`ADD\sR[0-9]+<<1,\sR[0-9]+`,-`MUL`
// loong64:"ADDVU","SLLV\t[$]5",-"MULV"
// s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD` // s390x:`SLD\t[$]5`,`SLD\t[$]6`,-`MULLD`
return n * 96 return n * 96
} }
func Mul_n120(n int) int { func Mul_n120(n int) int {
// loong64:"SLLV\t[$]3","SLLV\t[$]7","SUBVU",-"MULV"
// s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD` // s390x:`SLD\t[$]3`,`SLD\t[$]7`,-`MULLD`
return n * -120 return n * -120
} }

View file

@ -12,301 +12,361 @@ package codegen
func m0(x int64) int64 { func m0(x int64) int64 {
// amd64: "XORL" // amd64: "XORL"
// arm64: "MOVD\tZR" // arm64: "MOVD\tZR"
// loong64: "MOVV\t[$]0"
return x * 0 return x * 0
} }
func m2(x int64) int64 { func m2(x int64) int64 {
// amd64: "ADDQ" // amd64: "ADDQ"
// arm64: "ADD" // arm64: "ADD"
// loong64: "ADDVU"
return x * 2 return x * 2
} }
func m3(x int64) int64 { func m3(x int64) int64 {
// amd64: "LEAQ\t.*[*]2" // amd64: "LEAQ\t.*[*]2"
// arm64: "ADD\tR[0-9]+<<1," // arm64: "ADD\tR[0-9]+<<1,"
// loong64: "ADDVU","ADDVU"
return x * 3 return x * 3
} }
func m4(x int64) int64 { func m4(x int64) int64 {
// amd64: "SHLQ\t[$]2," // amd64: "SHLQ\t[$]2,"
// arm64: "LSL\t[$]2," // arm64: "LSL\t[$]2,"
// loong64: "SLLV\t[$]2,"
return x * 4 return x * 4
} }
func m5(x int64) int64 { func m5(x int64) int64 {
// amd64: "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]4"
// arm64: "ADD\tR[0-9]+<<2," // arm64: "ADD\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]2,","ADDVU"
return x * 5 return x * 5
} }
func m6(x int64) int64 { func m6(x int64) int64 {
// amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2" // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]2"
// arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1," // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<1,"
// loong64: "ADDVU","ADDVU","ADDVU"
return x * 6 return x * 6
} }
func m7(x int64) int64 { func m7(x int64) int64 {
// amd64: "LEAQ\t.*[*]2" // amd64: "LEAQ\t.*[*]2"
// arm64: "LSL\t[$]3,", "SUB\tR[0-9]+," // arm64: "LSL\t[$]3,", "SUB\tR[0-9]+,"
// loong64: "SLLV\t[$]3,","SUBVU"
return x * 7 return x * 7
} }
func m8(x int64) int64 { func m8(x int64) int64 {
// amd64: "SHLQ\t[$]3," // amd64: "SHLQ\t[$]3,"
// arm64: "LSL\t[$]3," // arm64: "LSL\t[$]3,"
// loong64: "SLLV\t[$]3,"
return x * 8 return x * 8
} }
func m9(x int64) int64 { func m9(x int64) int64 {
// amd64: "LEAQ\t.*[*]8" // amd64: "LEAQ\t.*[*]8"
// arm64: "ADD\tR[0-9]+<<3," // arm64: "ADD\tR[0-9]+<<3,"
// loong64: "SLLV\t[$]3,","ADDVU"
return x * 9 return x * 9
} }
func m10(x int64) int64 { func m10(x int64) int64 {
// amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]4"
// arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2," // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
// loong64: "ADDVU","SLLV\t[$]3,","ADDVU"
return x * 10 return x * 10
} }
func m11(x int64) int64 { func m11(x int64) int64 {
// amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2" // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]2"
// arm64: "MOVD\t[$]11,", "MUL" // arm64: "MOVD\t[$]11,", "MUL"
// loong64: "MOVV\t[$]11,", "MULV"
return x * 11 return x * 11
} }
func m12(x int64) int64 { func m12(x int64) int64 {
// amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2," // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]2,"
// arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<1,"
// loong64: "ADDVU","ADDVU","SLLV\t[$]2,"
return x * 12 return x * 12
} }
func m13(x int64) int64 { func m13(x int64) int64 {
// amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
// arm64: "MOVD\t[$]13,", "MUL" // arm64: "MOVD\t[$]13,", "MUL"
// loong64: "MOVV\t[$]13,","MULV"
return x * 13 return x * 13
} }
func m14(x int64) int64 { func m14(x int64) int64 {
// amd64: "IMUL3Q\t[$]14," // amd64: "IMUL3Q\t[$]14,"
// arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1," // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+<<1,"
// loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
return x * 14 return x * 14
} }
func m15(x int64) int64 { func m15(x int64) int64 {
// amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]4"
// arm64: "LSL\t[$]4,", "SUB\tR[0-9]+," // arm64: "LSL\t[$]4,", "SUB\tR[0-9]+,"
// loong64: "SLLV\t[$]4,","SUBVU"
return x * 15 return x * 15
} }
func m16(x int64) int64 { func m16(x int64) int64 {
// amd64: "SHLQ\t[$]4," // amd64: "SHLQ\t[$]4,"
// arm64: "LSL\t[$]4," // arm64: "LSL\t[$]4,"
// loong64: "SLLV\t[$]4,"
return x * 16 return x * 16
} }
func m17(x int64) int64 { func m17(x int64) int64 {
// amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
// arm64: "ADD\tR[0-9]+<<4," // arm64: "ADD\tR[0-9]+<<4,"
// loong64: "SLLV\t[$]4,","ADDVU"
return x * 17 return x * 17
} }
func m18(x int64) int64 { func m18(x int64) int64 {
// amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8" // amd64: "LEAQ\t.*[*]1", "LEAQ\t.*[*]8"
// arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3," // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
// loong64: "ADDVU","SLLV\t[$]4,","ADDVU"
return x * 18 return x * 18
} }
func m19(x int64) int64 { func m19(x int64) int64 {
// amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2" // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]2"
// arm64: "MOVD\t[$]19,", "MUL" // arm64: "MOVD\t[$]19,", "MUL"
// loong64: "MOVV\t[$]19,","MULV"
return x * 19 return x * 19
} }
func m20(x int64) int64 { func m20(x int64) int64 {
// amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2," // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]2,"
// arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]2,","SLLV\t[$]4,","ADDVU"
return x * 20 return x * 20
} }
func m21(x int64) int64 { func m21(x int64) int64 {
// amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
// arm64: "MOVD\t[$]21,", "MUL" // arm64: "MOVD\t[$]21,", "MUL"
// loong64: "MOVV\t[$]21,","MULV"
return x * 21 return x * 21
} }
func m22(x int64) int64 { func m22(x int64) int64 {
// amd64: "IMUL3Q\t[$]22," // amd64: "IMUL3Q\t[$]22,"
// arm64: "MOVD\t[$]22,", "MUL" // arm64: "MOVD\t[$]22,", "MUL"
// loong64: "MOVV\t[$]22,","MULV"
return x * 22 return x * 22
} }
func m23(x int64) int64 { func m23(x int64) int64 {
// amd64: "IMUL3Q\t[$]23," // amd64: "IMUL3Q\t[$]23,"
// arm64: "MOVD\t[$]23,", "MUL" // arm64: "MOVD\t[$]23,", "MUL"
// loong64: "MOVV\t[$]23,","MULV"
return x * 23 return x * 23
} }
func m24(x int64) int64 { func m24(x int64) int64 {
// amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3," // amd64: "LEAQ\t.*[*]2", "SHLQ\t[$]3,"
// arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1," // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<1,"
// loong64: "ADDVU","ADDVU","SLLV\t[$]3,"
return x * 24 return x * 24
} }
func m25(x int64) int64 { func m25(x int64) int64 {
// amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]4", "LEAQ\t.*[*]4"
// arm64: "MOVD\t[$]25,", "MUL" // arm64: "MOVD\t[$]25,", "MUL"
// loong64: "MOVV\t[$]25,","MULV"
return x * 25 return x * 25
} }
func m26(x int64) int64 { func m26(x int64) int64 {
// amd64: "IMUL3Q\t[$]26," // amd64: "IMUL3Q\t[$]26,"
// arm64: "MOVD\t[$]26,", "MUL" // arm64: "MOVD\t[$]26,", "MUL"
// loong64: "MOVV\t[$]26,","MULV"
return x * 26 return x * 26
} }
func m27(x int64) int64 { func m27(x int64) int64 {
// amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8" // amd64: "LEAQ\t.*[*]2", "LEAQ\t.*[*]8"
// arm64: "MOVD\t[$]27,", "MUL" // arm64: "MOVD\t[$]27,", "MUL"
// loong64: "MOVV\t[$]27,","MULV"
return x * 27 return x * 27
} }
func m28(x int64) int64 { func m28(x int64) int64 {
// amd64: "IMUL3Q\t[$]28," // amd64: "IMUL3Q\t[$]28,"
// arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2," // arm64: "LSL\t[$]5, "SUB\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]5,","SLLV\t[$]2,","SUBVU"
return x * 28 return x * 28
} }
func m29(x int64) int64 { func m29(x int64) int64 {
// amd64: "IMUL3Q\t[$]29," // amd64: "IMUL3Q\t[$]29,"
// arm64: "MOVD\t[$]29,", "MUL" // arm64: "MOVD\t[$]29,", "MUL"
// loong64: "MOVV\t[$]29,","MULV"
return x * 29 return x * 29
} }
func m30(x int64) int64 { func m30(x int64) int64 {
// amd64: "IMUL3Q\t[$]30," // amd64: "IMUL3Q\t[$]30,"
// arm64: "LSL\t[$]5,", "SUB\tR[0-9]+<<1," // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+<<1,"
// loong64: "ADDVU","SLLV\t[$]5,","SUBVU"
return x * 30 return x * 30
} }
func m31(x int64) int64 { func m31(x int64) int64 {
// amd64: "SHLQ\t[$]5,", "SUBQ" // amd64: "SHLQ\t[$]5,", "SUBQ"
// arm64: "LSL\t[$]5,", "SUB\tR[0-9]+," // arm64: "LSL\t[$]5,", "SUB\tR[0-9]+,"
// loong64: "SLLV\t[$]5,","SUBVU"
return x * 31 return x * 31
} }
func m32(x int64) int64 { func m32(x int64) int64 {
// amd64: "SHLQ\t[$]5," // amd64: "SHLQ\t[$]5,"
// arm64: "LSL\t[$]5," // arm64: "LSL\t[$]5,"
// loong64: "SLLV\t[$]5,"
return x * 32 return x * 32
} }
func m33(x int64) int64 { func m33(x int64) int64 {
// amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8" // amd64: "SHLQ\t[$]2,", "LEAQ\t.*[*]8"
// arm64: "ADD\tR[0-9]+<<5," // arm64: "ADD\tR[0-9]+<<5,"
// loong64: "SLLV\t[$]5,","ADDVU"
return x * 33 return x * 33
} }
func m34(x int64) int64 { func m34(x int64) int64 {
// amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2" // amd64: "SHLQ\t[$]5,", "LEAQ\t.*[*]2"
// arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4," // arm64: "ADD\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
// loong64: "ADDVU","SLLV\t[$]5,","ADDVU"
return x * 34 return x * 34
} }
func m35(x int64) int64 { func m35(x int64) int64 {
// amd64: "IMUL3Q\t[$]35," // amd64: "IMUL3Q\t[$]35,"
// arm64: "MOVD\t[$]35,", "MUL" // arm64: "MOVD\t[$]35,", "MUL"
// loong64: "MOVV\t[$]35,","MULV"
return x * 35 return x * 35
} }
func m36(x int64) int64 { func m36(x int64) int64 {
// amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2," // amd64: "LEAQ\t.*[*]8", "SHLQ\t[$]2,"
// arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3," // arm64: "LSL\t[$]2,", "ADD\tR[0-9]+<<3,"
// loong64: "SLLV\t[$]2,","SLLV\t[$]5,","ADDVU"
return x * 36 return x * 36
} }
func m37(x int64) int64 { func m37(x int64) int64 {
// amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4" // amd64: "LEAQ\t.*[*]8", "LEAQ\t.*[*]4"
// arm64: "MOVD\t[$]37,", "MUL" // arm64: "MOVD\t[$]37,", "MUL"
// loong64: "MOVV\t[$]37,","MULV"
return x * 37 return x * 37
} }
func m38(x int64) int64 { func m38(x int64) int64 {
// amd64: "IMUL3Q\t[$]38," // amd64: "IMUL3Q\t[$]38,"
// arm64: "MOVD\t[$]38,", "MUL" // arm64: "MOVD\t[$]38,", "MUL"
// loong64: "MOVV\t[$]38,","MULV"
return x * 38 return x * 38
} }
func m39(x int64) int64 { func m39(x int64) int64 {
// amd64: "IMUL3Q\t[$]39," // amd64: "IMUL3Q\t[$]39,"
// arm64: "MOVD\t[$]39,", "MUL" // arm64: "MOVD\t[$]39,", "MUL"
// loong64: "MOVV\t[$]39,", "MULV"
return x * 39 return x * 39
} }
func m40(x int64) int64 { func m40(x int64) int64 {
// amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3," // amd64: "LEAQ\t.*[*]4", "SHLQ\t[$]3,"
// arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2," // arm64: "LSL\t[$]3,", "ADD\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]3,","SLLV\t[$]5,","ADDVU"
return x * 40 return x * 40
} }
func mn1(x int64) int64 { func mn1(x int64) int64 {
// amd64: "NEGQ\t" // amd64: "NEGQ\t"
// arm64: "NEG\tR[0-9]+," // arm64: "NEG\tR[0-9]+,"
// loong64: "SUBVU\tR[0-9], R0,"
return x * -1 return x * -1
} }
func mn2(x int64) int64 { func mn2(x int64) int64 {
// amd64: "NEGQ", "ADDQ" // amd64: "NEGQ", "ADDQ"
// arm64: "NEG\tR[0-9]+<<1," // arm64: "NEG\tR[0-9]+<<1,"
// loong64: "ADDVU","SUBVU\tR[0-9], R0,"
return x * -2 return x * -2
} }
func mn3(x int64) int64 { func mn3(x int64) int64 {
// amd64: "NEGQ", "LEAQ\t.*[*]2" // amd64: "NEGQ", "LEAQ\t.*[*]2"
// arm64: "SUB\tR[0-9]+<<2," // arm64: "SUB\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]2,","SUBVU"
return x * -3 return x * -3
} }
func mn4(x int64) int64 { func mn4(x int64) int64 {
// amd64: "NEGQ", "SHLQ\t[$]2," // amd64: "NEGQ", "SHLQ\t[$]2,"
// arm64: "NEG\tR[0-9]+<<2," // arm64: "NEG\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]2,","SUBVU\tR[0-9], R0,"
return x * -4 return x * -4
} }
func mn5(x int64) int64 { func mn5(x int64) int64 {
// amd64: "NEGQ", "LEAQ\t.*[*]4" // amd64: "NEGQ", "LEAQ\t.*[*]4"
// arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2," // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<2,"
// loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]2,","SUBVU"
return x * -5 return x * -5
} }
func mn6(x int64) int64 { func mn6(x int64) int64 {
// amd64: "IMUL3Q\t[$]-6," // amd64: "IMUL3Q\t[$]-6,"
// arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2," // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<2,"
// loong64: "ADDVU","SLLV\t[$]3,","SUBVU"
return x * -6 return x * -6
} }
func mn7(x int64) int64 { func mn7(x int64) int64 {
// amd64: "NEGQ", "LEAQ\t.*[*]8" // amd64: "NEGQ", "LEAQ\t.*[*]8"
// arm64: "SUB\tR[0-9]+<<3," // arm64: "SUB\tR[0-9]+<<3,"
// loong64: "SLLV\t[$]3","SUBVU"
return x * -7 return x * -7
} }
func mn8(x int64) int64 { func mn8(x int64) int64 {
// amd64: "NEGQ", "SHLQ\t[$]3," // amd64: "NEGQ", "SHLQ\t[$]3,"
// arm64: "NEG\tR[0-9]+<<3," // arm64: "NEG\tR[0-9]+<<3,"
// loong64: "SLLV\t[$]3","SUBVU\tR[0-9], R0,"
return x * -8 return x * -8
} }
func mn9(x int64) int64 { func mn9(x int64) int64 {
// amd64: "NEGQ", "LEAQ\t.*[*]8" // amd64: "NEGQ", "LEAQ\t.*[*]8"
// arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3," // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<3,"
// loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]3","SUBVU"
return x * -9 return x * -9
} }
func mn10(x int64) int64 { func mn10(x int64) int64 {
// amd64: "IMUL3Q\t[$]-10," // amd64: "IMUL3Q\t[$]-10,"
// arm64: "MOVD\t[$]-10,", "MUL" // arm64: "MOVD\t[$]-10,", "MUL"
// loong64: "MOVV\t[$]-10","MULV"
return x * -10 return x * -10
} }
func mn11(x int64) int64 { func mn11(x int64) int64 {
// amd64: "IMUL3Q\t[$]-11," // amd64: "IMUL3Q\t[$]-11,"
// arm64: "MOVD\t[$]-11,", "MUL" // arm64: "MOVD\t[$]-11,", "MUL"
// loong64: "MOVV\t[$]-11","MULV"
return x * -11 return x * -11
} }
func mn12(x int64) int64 { func mn12(x int64) int64 {
// amd64: "IMUL3Q\t[$]-12," // amd64: "IMUL3Q\t[$]-12,"
// arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2," // arm64: "LSL\t[$]2,", "SUB\tR[0-9]+<<2,"
// loong64: "SLLV\t[$]2,","SLLV\t[$]4,","SUBVU"
return x * -12 return x * -12
} }
func mn13(x int64) int64 { func mn13(x int64) int64 {
// amd64: "IMUL3Q\t[$]-13," // amd64: "IMUL3Q\t[$]-13,"
// arm64: "MOVD\t[$]-13,", "MUL" // arm64: "MOVD\t[$]-13,", "MUL"
// loong64: "MOVV\t[$]-13","MULV"
return x * -13 return x * -13
} }
func mn14(x int64) int64 { func mn14(x int64) int64 {
// amd64: "IMUL3Q\t[$]-14," // amd64: "IMUL3Q\t[$]-14,"
// arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3," // arm64: "ADD\tR[0-9]+,", "SUB\tR[0-9]+<<3,"
// loong64: "ADDVU","SLLV\t[$]4,","SUBVU"
return x * -14 return x * -14
} }
func mn15(x int64) int64 { func mn15(x int64) int64 {
// amd64: "SHLQ\t[$]4,", "SUBQ" // amd64: "SHLQ\t[$]4,", "SUBQ"
// arm64: "SUB\tR[0-9]+<<4," // arm64: "SUB\tR[0-9]+<<4,"
// loong64: "SLLV\t[$]4,","SUBVU"
return x * -15 return x * -15
} }
func mn16(x int64) int64 { func mn16(x int64) int64 {
// amd64: "NEGQ", "SHLQ\t[$]4," // amd64: "NEGQ", "SHLQ\t[$]4,"
// arm64: "NEG\tR[0-9]+<<4," // arm64: "NEG\tR[0-9]+<<4,"
// loong64: "SLLV\t[$]4,","SUBVU\tR[0-9], R0,"
return x * -16 return x * -16
} }
func mn17(x int64) int64 { func mn17(x int64) int64 {
// amd64: "IMUL3Q\t[$]-17," // amd64: "IMUL3Q\t[$]-17,"
// arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4," // arm64: "NEG\tR[0-9]+,", "ADD\tR[0-9]+<<4,"
// loong64: "SUBVU\tR[0-9], R0,","SLLV\t[$]4,","SUBVU"
return x * -17 return x * -17
} }
func mn18(x int64) int64 { func mn18(x int64) int64 {
// amd64: "IMUL3Q\t[$]-18," // amd64: "IMUL3Q\t[$]-18,"
// arm64: "MOVD\t[$]-18,", "MUL" // arm64: "MOVD\t[$]-18,", "MUL"
// loong64: "MOVV\t[$]-18","MULV"
return x * -18 return x * -18
} }
func mn19(x int64) int64 { func mn19(x int64) int64 {
// amd64: "IMUL3Q\t[$]-19," // amd64: "IMUL3Q\t[$]-19,"
// arm64: "MOVD\t[$]-19,", "MUL" // arm64: "MOVD\t[$]-19,", "MUL"
// loong64: "MOVV\t[$]-19","MULV"
return x * -19 return x * -19
} }
func mn20(x int64) int64 { func mn20(x int64) int64 {
// amd64: "IMUL3Q\t[$]-20," // amd64: "IMUL3Q\t[$]-20,"
// arm64: "MOVD\t[$]-20,", "MUL" // arm64: "MOVD\t[$]-20,", "MUL"
// loong64: "MOVV\t[$]-20","MULV"
return x * -20 return x * -20
} }