mirror of
https://github.com/golang/go.git
synced 2026-06-28 03:40:37 +00:00
cmd/compile: add math.{Ceil,Trunc,Floor,RoundToEven} intrinsics on loong64
goos: linux
goarch: loong64
pkg: math
cpu: Loongson-3A6000 @ 2500.00MHz
| old.txt | new.txt |
| sec/op | sec/op vs base |
Ceil-8 2.9640n ± 1% 0.8015n ± 0% -72.96% (p=0.000 n=10)
Ceil32-8 2.9585n ± 0% 0.8017n ± 0% -72.90% (p=0.000 n=10)
Floor-8 2.9045n ± 0% 0.8014n ± 0% -72.41% (p=0.000 n=10)
Floor32-8 2.9275n ± 0% 0.8014n ± 0% -72.62% (p=0.000 n=10)
RoundToEven-8 2.5815n ± 1% 0.8014n ± 0% -68.96% (p=0.000 n=10)
RoundToEven32-8 2.6795n ± 0% 0.8021n ± 0% -70.06% (p=0.000 n=10)
Trunc-8 2.9220n ± 1% 0.8014n ± 0% -72.57% (p=0.000 n=10)
Trunc32-8 2.9245n ± 0% 0.8016n ± 0% -72.59% (p=0.000 n=10)
geomean 2.854n 0.8016n -71.92%
Change-Id: Ia7f58ea4f6832d3dc44822f28a065d73f077f217
Reviewed-on: https://go-review.googlesource.com/c/go/+/770960
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@google.com>
Reviewed-by: Carlos Amedee <carlos@golang.org>
LUCI-TryBot-Result: golang-scoped@luci-project-accounts.iam.gserviceaccount.com <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Meidan Li <limeidan@loongson.cn>
This commit is contained in:
parent
4e0783368b
commit
3fdac6780b
7 changed files with 133 additions and 3 deletions
|
|
@ -539,7 +539,11 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
|
||||
case ssa.OpLOONG64VPCNT64,
|
||||
ssa.OpLOONG64VPCNT32,
|
||||
ssa.OpLOONG64VPCNT16:
|
||||
ssa.OpLOONG64VPCNT16,
|
||||
ssa.OpLOONG64FRINTND,
|
||||
ssa.OpLOONG64FRINTZD,
|
||||
ssa.OpLOONG64FRINTPD,
|
||||
ssa.OpLOONG64FRINTMD:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = ((v.Args[0].Reg() - loong64.REG_F0) & 31) + loong64.REG_V0
|
||||
|
|
|
|||
|
|
@ -207,10 +207,16 @@
|
|||
(PopCount16 <t> x) => (MOVWfpgp <t> (VPCNT16 <typ.Float32> (MOVWgpfp <typ.Float32> (ZeroExt16to32 x))))
|
||||
|
||||
// math package intrinsics
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt32 ...) => (SQRTF ...)
|
||||
|
||||
(Abs ...) => (ABSD ...)
|
||||
(Copysign ...) => (FCOPYSGD ...)
|
||||
(Copysign ...) => (FCOPYSGD ...)
|
||||
|
||||
(RoundToEven ...) => (FRINTND ...)
|
||||
(Floor ...) => (FRINTMD ...)
|
||||
(Ceil ...) => (FRINTPD ...)
|
||||
(Trunc ...) => (FRINTZD ...)
|
||||
|
||||
(Min(64|32)F ...) => (FMIN(D|F) ...)
|
||||
(Max(64|32)F ...) => (FMAX(D|F) ...)
|
||||
|
|
|
|||
|
|
@ -355,6 +355,12 @@ func init() {
|
|||
{name: "MOVFD", argLength: 1, reg: fp11, asm: "MOVFD"}, // float32 -> float64
|
||||
{name: "MOVDF", argLength: 1, reg: fp11, asm: "MOVDF"}, // float64 -> float32
|
||||
|
||||
// 64-bit floating-point round to integers in 64-bit FP format
|
||||
{name: "FRINTND", argLength: 1, reg: fp11, asm: "VFRINTRNED"}, // Round (ties to even; ; 0.5 -> 0, 1.5 -> 2)
|
||||
{name: "FRINTMD", argLength: 1, reg: fp11, asm: "VFRINTRMD"}, // Floor (towards Minus; 0.5 -> 0, -0.5 -> -1)
|
||||
{name: "FRINTPD", argLength: 1, reg: fp11, asm: "VFRINTRPD"}, // Ceil (towards Positive; 0.5 -> 1, -0.5 -> 0)
|
||||
{name: "FRINTZD", argLength: 1, reg: fp11, asm: "VFRINTRZD"}, // Trunc (towards Zero; 0.5 -> 0, -0.5 -> 0))
|
||||
|
||||
// Round ops to block fused-multiply-add extraction.
|
||||
{name: "LoweredRound32F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||
{name: "LoweredRound64F", argLength: 1, reg: fp11, resultInArg0: true},
|
||||
|
|
|
|||
|
|
@ -4650,6 +4650,10 @@ const (
|
|||
OpLOONG64TRUNCDV
|
||||
OpLOONG64MOVFD
|
||||
OpLOONG64MOVDF
|
||||
OpLOONG64FRINTND
|
||||
OpLOONG64FRINTMD
|
||||
OpLOONG64FRINTPD
|
||||
OpLOONG64FRINTZD
|
||||
OpLOONG64LoweredRound32F
|
||||
OpLOONG64LoweredRound64F
|
||||
OpLOONG64CALLstatic
|
||||
|
|
@ -72009,6 +72013,58 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FRINTND",
|
||||
argLen: 1,
|
||||
asm: loong64.AVFRINTRNED,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FRINTMD",
|
||||
argLen: 1,
|
||||
asm: loong64.AVFRINTRMD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FRINTPD",
|
||||
argLen: 1,
|
||||
asm: loong64.AVFRINTRPD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FRINTZD",
|
||||
argLen: 1,
|
||||
asm: loong64.AVFRINTRZD,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4611686017353646080}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LoweredRound32F",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -157,6 +157,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
|||
case OpBswap64:
|
||||
v.Op = OpLOONG64REVBV
|
||||
return true
|
||||
case OpCeil:
|
||||
v.Op = OpLOONG64FRINTPD
|
||||
return true
|
||||
case OpClosureCall:
|
||||
v.Op = OpLOONG64CALLclosure
|
||||
return true
|
||||
|
|
@ -286,6 +289,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
|||
case OpFMA:
|
||||
v.Op = OpLOONG64FMADDD
|
||||
return true
|
||||
case OpFloor:
|
||||
v.Op = OpLOONG64FRINTMD
|
||||
return true
|
||||
case OpGetCallerPC:
|
||||
v.Op = OpLOONG64LoweredGetCallerPC
|
||||
return true
|
||||
|
|
@ -696,6 +702,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
|||
case OpRound64F:
|
||||
v.Op = OpLOONG64LoweredRound64F
|
||||
return true
|
||||
case OpRoundToEven:
|
||||
v.Op = OpLOONG64FRINTND
|
||||
return true
|
||||
case OpRsh16Ux16:
|
||||
return rewriteValueLOONG64_OpRsh16Ux16(v)
|
||||
case OpRsh16Ux32:
|
||||
|
|
@ -824,6 +833,9 @@ func rewriteValueLOONG64(v *Value) bool {
|
|||
case OpTailCallInter:
|
||||
v.Op = OpLOONG64CALLtailinter
|
||||
return true
|
||||
case OpTrunc:
|
||||
v.Op = OpLOONG64FRINTZD
|
||||
return true
|
||||
case OpTrunc16to8:
|
||||
v.Op = OpCopy
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -888,6 +888,48 @@ func initIntrinsics(cfg *intrinsicBuildConfig) {
|
|||
makeRoundAMD64(ssa.OpTrunc),
|
||||
sys.AMD64)
|
||||
|
||||
makeRoundLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
|
||||
v := s.load(types.Types[types.TBOOL], addr)
|
||||
b := s.endBlock()
|
||||
b.Kind = ssa.BlockIf
|
||||
b.SetControl(v)
|
||||
bTrue := s.f.NewBlock(ssa.BlockPlain)
|
||||
bFalse := s.f.NewBlock(ssa.BlockPlain)
|
||||
bEnd := s.f.NewBlock(ssa.BlockPlain)
|
||||
b.AddEdgeTo(bTrue)
|
||||
b.AddEdgeTo(bFalse)
|
||||
b.Likely = ssa.BranchLikely // most loong64 machines support the LSX
|
||||
|
||||
// We have the intrinsic - use it directly.
|
||||
s.startBlock(bTrue)
|
||||
s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0])
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Call the pure Go version.
|
||||
s.startBlock(bFalse)
|
||||
s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
|
||||
s.endBlock().AddEdgeTo(bEnd)
|
||||
|
||||
// Merge results.
|
||||
s.startBlock(bEnd)
|
||||
return s.variable(n, types.Types[types.TFLOAT64])
|
||||
}
|
||||
}
|
||||
addF("math", "RoundToEven",
|
||||
makeRoundLoong64(ssa.OpRoundToEven),
|
||||
sys.Loong64)
|
||||
addF("math", "Floor",
|
||||
makeRoundLoong64(ssa.OpFloor),
|
||||
sys.Loong64)
|
||||
addF("math", "Ceil",
|
||||
makeRoundLoong64(ssa.OpCeil),
|
||||
sys.Loong64)
|
||||
addF("math", "Trunc",
|
||||
makeRoundLoong64(ssa.OpTrunc),
|
||||
sys.Loong64)
|
||||
|
||||
/******** math/bits ********/
|
||||
addF("math/bits", "TrailingZeros64",
|
||||
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
|
||||
|
|
|
|||
|
|
@ -432,6 +432,10 @@ var wantIntrinsics = map[testIntrinsicKey]struct{}{
|
|||
{"loong64", "math", "Copysign"}: struct{}{},
|
||||
{"loong64", "math", "FMA"}: struct{}{},
|
||||
{"loong64", "math", "sqrt"}: struct{}{},
|
||||
{"loong64", "math", "Ceil"}: struct{}{},
|
||||
{"loong64", "math", "Floor"}: struct{}{},
|
||||
{"loong64", "math", "RoundToEven"}: struct{}{},
|
||||
{"loong64", "math", "Trunc"}: struct{}{},
|
||||
{"loong64", "math/big", "mulWW"}: struct{}{},
|
||||
{"loong64", "math/bits", "Add"}: struct{}{},
|
||||
{"loong64", "math/bits", "Add64"}: struct{}{},
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue