cmd/compile: add math.{Ceil,Trunc,Floor,RoundToEven} intrinsics on loong64

goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A6000 @ 2500.00MHz
                |   old.txt    |               new.txt                |
                |    sec/op    |    sec/op     vs base                |
Ceil-8            2.9640n ± 1%   0.8015n ± 0%  -72.96% (p=0.000 n=10)
Ceil32-8          2.9585n ± 0%   0.8017n ± 0%  -72.90% (p=0.000 n=10)
Floor-8           2.9045n ± 0%   0.8014n ± 0%  -72.41% (p=0.000 n=10)
Floor32-8         2.9275n ± 0%   0.8014n ± 0%  -72.62% (p=0.000 n=10)
RoundToEven-8     2.5815n ± 1%   0.8014n ± 0%  -68.96% (p=0.000 n=10)
RoundToEven32-8   2.6795n ± 0%   0.8021n ± 0%  -70.06% (p=0.000 n=10)
Trunc-8           2.9220n ± 1%   0.8014n ± 0%  -72.57% (p=0.000 n=10)
Trunc32-8         2.9245n ± 0%   0.8016n ± 0%  -72.59% (p=0.000 n=10)
geomean            2.854n        0.8016n       -71.92%

Change-Id: Ia7f58ea4f6832d3dc44822f28a065d73f077f217
Reviewed-on: https://go-review.googlesource.com/c/go/+/770960
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
This commit is contained in:
Guoqi Chen 2026-04-23 21:00:51 +08:00 committed by Gopher Robot
parent 4e0783368b
commit 3fdac6780b
7 changed files with 133 additions and 3 deletions

View file

@ -539,7 +539,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
case ssa.OpLOONG64VPCNT64,
ssa.OpLOONG64VPCNT32,
ssa.OpLOONG64VPCNT16:
ssa.OpLOONG64VPCNT16,
ssa.OpLOONG64FRINTND,
ssa.OpLOONG64FRINTZD,
ssa.OpLOONG64FRINTPD,
ssa.OpLOONG64FRINTMD:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0

View file

@ -207,10 +207,16 @@
(PopCount16 <t> x) => (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
// math package intrinsics
(Sqrt ...) => (SQRTD ...)
(Sqrt ...) => (SQRTD ...)
(Sqrt32 ...) => (SQRTF ...)
(Abs ...) => (ABSD ...)
(Copysign ...) => (FCOPYSGD ...)
(Copysign ...) => (FCOPYSGD ...)
(RoundToEven ...) => (FRINTND ...)
(Floor ...) => (FRINTMD ...)
(Ceil ...) => (FRINTPD ...)
(Trunc ...) => (FRINTZD ...)
(Min(64|32)F ...) => (FMIN(D|F) ...)
(Max(64|32)F ...) => (FMAX(D|F) ...)

View file

@ -355,6 +355,12 @@ func init() {
{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64
{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32
// 64-bit floating-point round to integers in 64-bit FP format
{name: "FRINTND", argLength: 1, reg: fp11, asm: "VFRINTRNED"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2)
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "VFRINTRMD"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1)
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "VFRINTRPD"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0)
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "VFRINTRZD"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0))
// Round ops to block fused-multiply-add extraction.
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},

View file

@ -4650,6 +4650,10 @@ const (
OpLOONG64TRUNCDV
OpLOONG64MOVFD
OpLOONG64MOVDF
OpLOONG64FRINTND
OpLOONG64FRINTMD
OpLOONG64FRINTPD
OpLOONG64FRINTZD
OpLOONG64LoweredRound32F
OpLOONG64LoweredRound64F
OpLOONG64CALLstatic
@ -72009,6 +72013,58 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "FRINTND",
argLen: 1,
asm: loong64.AVFRINTRNED,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTMD",
argLen: 1,
asm: loong64.AVFRINTRMD,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTPD",
argLen: 1,
asm: loong64.AVFRINTRPD,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "FRINTZD",
argLen: 1,
asm: loong64.AVFRINTRZD,
reg: regInfo{
inputs: []inputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
outputs: []outputInfo{
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
},
},
},
{
name: "LoweredRound32F",
argLen: 1,

View file

@ -157,6 +157,9 @@ func rewriteValueLOONG64(v *Value) bool {
case OpBswap64:
v.Op = OpLOONG64REVBV
return true
case OpCeil:
v.Op = OpLOONG64FRINTPD
return true
case OpClosureCall:
v.Op = OpLOONG64CALLclosure
return true
@ -286,6 +289,9 @@ func rewriteValueLOONG64(v *Value) bool {
case OpFMA:
v.Op = OpLOONG64FMADDD
return true
case OpFloor:
v.Op = OpLOONG64FRINTMD
return true
case OpGetCallerPC:
v.Op = OpLOONG64LoweredGetCallerPC
return true
@ -696,6 +702,9 @@ func rewriteValueLOONG64(v *Value) bool {
case OpRound64F:
v.Op = OpLOONG64LoweredRound64F
return true
case OpRoundToEven:
v.Op = OpLOONG64FRINTND
return true
case OpRsh16Ux16:
return rewriteValueLOONG64_OpRsh16Ux16(v)
case OpRsh16Ux32:
@ -824,6 +833,9 @@ func rewriteValueLOONG64(v *Value) bool {
case OpTailCallInter:
v.Op = OpLOONG64CALLtailinter
return true
case OpTrunc:
v.Op = OpLOONG64FRINTZD
return true
case OpTrunc16to8:
v.Op = OpCopy
return true

View file

@ -888,6 +888,48 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
makeRoundAMD64(ssa.OpTrunc),
sys.AMD64)
makeRoundLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
v := s.load(types.Types[types.TBOOL], addr)
b := s.endBlock()
b.Kind = ssa.BlockIf
b.SetControl(v)
bTrue := s.f.NewBlock(ssa.BlockPlain)
bFalse := s.f.NewBlock(ssa.BlockPlain)
bEnd := s.f.NewBlock(ssa.BlockPlain)
b.AddEdgeTo(bTrue)
b.AddEdgeTo(bFalse)
b.Likely = ssa.BranchLikely // most loong64 machines support the LSX
// We have the intrinsic - use it directly.
s.startBlock(bTrue)
s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0])
s.endBlock().AddEdgeTo(bEnd)
// Call the pure Go version.
s.startBlock(bFalse)
s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
s.endBlock().AddEdgeTo(bEnd)
// Merge results.
s.startBlock(bEnd)
return s.variable(n, types.Types[types.TFLOAT64])
}
}
addF("math", "RoundToEven",
makeRoundLoong64(ssa.OpRoundToEven),
sys.Loong64)
addF("math", "Floor",
makeRoundLoong64(ssa.OpFloor),
sys.Loong64)
addF("math", "Ceil",
makeRoundLoong64(ssa.OpCeil),
sys.Loong64)
addF("math", "Trunc",
makeRoundLoong64(ssa.OpTrunc),
sys.Loong64)
/******** math/bits ********/
addF("math/bits", "TrailingZeros64",
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {

View file

@ -432,6 +432,10 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
{"loong64", "math", "Copysign"}: struct{}{},
{"loong64", "math", "FMA"}: struct{}{},
{"loong64", "math", "sqrt"}: struct{}{},
{"loong64", "math", "Ceil"}: struct{}{},
{"loong64", "math", "Floor"}: struct{}{},
{"loong64", "math", "RoundToEven"}: struct{}{},
{"loong64", "math", "Trunc"}: struct{}{},
{"loong64", "math/big", "mulWW"}: struct{}{},
{"loong64", "math/bits", "Add"}: struct{}{},
{"loong64", "math/bits", "Add64"}: struct{}{},