mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/compile: optimize single-precision floating point square root
Add generic rule to rewrite the single-precision square root expression with one single-precision instruction. The optimization will reduce two times of precision converting between double-precision and single-precision. On arm64 flatform. previous: FCVTSD F0, F0 FSQRTD F0, F0 FCVTDS F0, F0 optimized: FSQRTS S0, S0 And this patch adds the test case to check the correctness. This patch refers to CL 241877, contributed by Alice Xu (dianhong.xu@arm.com) Change-Id: I6de5d02281c693017ac4bd4c10963dd55989bd7e Reviewed-on: https://go-review.googlesource.com/c/go/+/276873 Trust: fannie zhang <Fannie.Zhang@arm.com> Run-TryBot: fannie zhang <Fannie.Zhang@arm.com> TryBot-Result: Go Bot <gobot@golang.org> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
ebb92dfed9
commit
2b50ab2aee
41 changed files with 255 additions and 28 deletions
|
|
@ -1053,7 +1053,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg0()
|
||||
|
||||
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
|
||||
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
|
|
@ -1061,7 +1061,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
switch v.Op {
|
||||
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
|
||||
p.To.Reg = v.Reg0()
|
||||
case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
|
||||
case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
|
||||
p.To.Reg = v.Reg()
|
||||
}
|
||||
case ssa.OpAMD64ROUNDSD:
|
||||
|
|
|
|||
|
|
@ -654,6 +654,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpARMREV,
|
||||
ssa.OpARMREV16,
|
||||
ssa.OpARMRBIT,
|
||||
ssa.OpARMSQRTF,
|
||||
ssa.OpARMSQRTD,
|
||||
ssa.OpARMNEGF,
|
||||
ssa.OpARMNEGD,
|
||||
|
|
|
|||
|
|
@ -893,6 +893,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpARM64FMOVSgpfp,
|
||||
ssa.OpARM64FNEGS,
|
||||
ssa.OpARM64FNEGD,
|
||||
ssa.OpARM64FSQRTS,
|
||||
ssa.OpARM64FSQRTD,
|
||||
ssa.OpARM64FCVTZSSW,
|
||||
ssa.OpARM64FCVTZSDW,
|
||||
|
|
|
|||
|
|
@ -363,6 +363,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpMIPSMOVDF,
|
||||
ssa.OpMIPSNEGF,
|
||||
ssa.OpMIPSNEGD,
|
||||
ssa.OpMIPSSQRTF,
|
||||
ssa.OpMIPSSQRTD,
|
||||
ssa.OpMIPSCLZ:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
|
|
|
|||
|
|
@ -355,6 +355,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
ssa.OpMIPS64MOVDF,
|
||||
ssa.OpMIPS64NEGF,
|
||||
ssa.OpMIPS64NEGD,
|
||||
ssa.OpMIPS64SQRTF,
|
||||
ssa.OpMIPS64SQRTD:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
|
|
|
|||
|
|
@ -586,7 +586,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.Reg = v.Args[1].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpS390XFSQRT:
|
||||
case ssa.OpS390XFSQRTS, ssa.OpS390XFSQRT:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@
|
|||
(Bswap32 ...) => (BSWAPL ...)
|
||||
|
||||
(Sqrt ...) => (SQRTSD ...)
|
||||
(Sqrt32 ...) => (SQRTSS ...)
|
||||
|
||||
(Ctz16 x) => (BSFL (ORLconst <typ.UInt32> [0x10000] x))
|
||||
(Ctz16NonZero ...) => (BSFL ...)
|
||||
|
|
|
|||
|
|
@ -308,6 +308,7 @@ func init() {
|
|||
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true, clobberFlags: true}, // arg0 swap bytes
|
||||
|
||||
{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
|
||||
{name: "SQRTSS", argLength: 1, reg: fp11, asm: "SQRTSS"}, // sqrt(arg0), float32
|
||||
|
||||
{name: "SBBLcarrymask", argLength: 1, reg: flagsgp, asm: "SBBL"}, // (int32)(-1) if carry is set, 0 if carry is clear.
|
||||
// Note: SBBW and SBBB are subsumed by SBBL
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@
|
|||
(PopCount8 x) => (POPCNTL (MOVBQZX <typ.UInt32> x))
|
||||
|
||||
(Sqrt ...) => (SQRTSD ...)
|
||||
(Sqrt32 ...) => (SQRTSS ...)
|
||||
|
||||
(RoundToEven x) => (ROUNDSD [0] x)
|
||||
(Floor x) => (ROUNDSD [1] x)
|
||||
|
|
|
|||
|
|
@ -594,6 +594,7 @@ func init() {
|
|||
{name: "POPCNTL", argLength: 1, reg: gp11, asm: "POPCNTL", clobberFlags: true}, // count number of set bits in arg0
|
||||
|
||||
{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
|
||||
{name: "SQRTSS", argLength: 1, reg: fp11, asm: "SQRTSS"}, // sqrt(arg0), float32
|
||||
|
||||
// ROUNDSD instruction isn't guaranteed to be on the target platform (it is SSE4.1)
|
||||
// Any use must be preceded by a successful check of runtime.x86HasSSE41.
|
||||
|
|
|
|||
|
|
@ -56,6 +56,7 @@
|
|||
(Com(32|16|8) ...) => (MVN ...)
|
||||
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt32 ...) => (SQRTF ...)
|
||||
(Abs ...) => (ABSD ...)
|
||||
|
||||
// TODO: optimize this for ARMv5 and ARMv6
|
||||
|
|
|
|||
|
|
@ -60,6 +60,8 @@
|
|||
(Trunc ...) => (FRINTZD ...)
|
||||
(FMA x y z) => (FMADDD z x y)
|
||||
|
||||
(Sqrt32 ...) => (FSQRTS ...)
|
||||
|
||||
// lowering rotates
|
||||
(RotateLeft8 <t> x (MOVDconst [c])) => (Or8 (Lsh8x64 <t> x (MOVDconst [c&7])) (Rsh8Ux64 <t> x (MOVDconst [-c&7])))
|
||||
(RotateLeft16 <t> x (MOVDconst [c])) => (Or16 (Lsh16x64 <t> x (MOVDconst [c&15])) (Rsh16Ux64 <t> x (MOVDconst [-c&15])))
|
||||
|
|
|
|||
|
|
@ -236,6 +236,7 @@ func init() {
|
|||
{name: "FNEGS", argLength: 1, reg: fp11, asm: "FNEGS"}, // -arg0, float32
|
||||
{name: "FNEGD", argLength: 1, reg: fp11, asm: "FNEGD"}, // -arg0, float64
|
||||
{name: "FSQRTD", argLength: 1, reg: fp11, asm: "FSQRTD"}, // sqrt(arg0), float64
|
||||
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
|
||||
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit
|
||||
{name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit
|
||||
{name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit
|
||||
|
|
|
|||
|
|
@ -217,6 +217,7 @@ func init() {
|
|||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
{name: "ABSD", argLength: 1, reg: fp11, asm: "ABSD"}, // abs(arg0), float64
|
||||
|
||||
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@
|
|||
(Com(32|16|8) x) => (NORconst [0] x)
|
||||
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt32 ...) => (SQRTF ...)
|
||||
|
||||
// TODO: optimize this case?
|
||||
(Ctz32NonZero ...) => (Ctz32 ...)
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@
|
|||
(Com(64|32|16|8) x) => (NOR (MOVVconst [0]) x)
|
||||
|
||||
(Sqrt ...) => (SQRTD ...)
|
||||
(Sqrt32 ...) => (SQRTF ...)
|
||||
|
||||
// boolean ops -- booleans are represented with 0=false, 1=true
|
||||
(AndB ...) => (AND ...)
|
||||
|
|
|
|||
|
|
@ -199,6 +199,7 @@ func init() {
|
|||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
|
||||
// shifts
|
||||
{name: "SLLV", argLength: 2, reg: gp21, asm: "SLLV"}, // arg0 << arg1, shift amount is mod 64
|
||||
|
|
|
|||
|
|
@ -182,6 +182,7 @@ func init() {
|
|||
{name: "NEGF", argLength: 1, reg: fp11, asm: "NEGF"}, // -arg0, float32
|
||||
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
|
||||
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
|
||||
{name: "SQRTF", argLength: 1, reg: fp11, asm: "SQRTF"}, // sqrt(arg0), float32
|
||||
|
||||
// shifts
|
||||
{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 32
|
||||
|
|
|
|||
|
|
@ -71,6 +71,7 @@
|
|||
(Round(32|64)F ...) => (LoweredRound(32|64)F ...)
|
||||
|
||||
(Sqrt ...) => (FSQRT ...)
|
||||
(Sqrt32 ...) => (FSQRTS ...)
|
||||
(Floor ...) => (FFLOOR ...)
|
||||
(Ceil ...) => (FCEIL ...)
|
||||
(Trunc ...) => (FTRUNC ...)
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@
|
|||
(Com8 ...) => (NOT ...)
|
||||
|
||||
(Sqrt ...) => (FSQRTD ...)
|
||||
(Sqrt32 ...) => (FSQRTS ...)
|
||||
|
||||
// Sign and zero extension.
|
||||
|
||||
|
|
|
|||
|
|
@ -142,6 +142,8 @@
|
|||
(Round x) => (FIDBR [1] x)
|
||||
(FMA x y z) => (FMADD z x y)
|
||||
|
||||
(Sqrt32 ...) => (FSQRTS ...)
|
||||
|
||||
// Atomic loads and stores.
|
||||
// The SYNC instruction (fast-BCR-serialization) prevents store-load
|
||||
// reordering. Other sequences of memory operations (load-load,
|
||||
|
|
|
|||
|
|
@ -382,6 +382,7 @@ func init() {
|
|||
{name: "NOTW", argLength: 1, reg: gp11, resultInArg0: true, clobberFlags: true}, // ^arg0
|
||||
|
||||
{name: "FSQRT", argLength: 1, reg: fp11, asm: "FSQRT"}, // sqrt(arg0)
|
||||
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
|
||||
|
||||
// Conditional register-register moves.
|
||||
// The aux for these values is an s390x.CCMask value representing the condition code mask.
|
||||
|
|
|
|||
|
|
@ -332,6 +332,8 @@
|
|||
(Abs ...) => (F64Abs ...)
|
||||
(Copysign ...) => (F64Copysign ...)
|
||||
|
||||
(Sqrt32 ...) => (F32Sqrt ...)
|
||||
|
||||
(Ctz64 ...) => (I64Ctz ...)
|
||||
(Ctz32 x) => (I64Ctz (I64Or x (I64Const [0x100000000])))
|
||||
(Ctz16 x) => (I64Ctz (I64Or x (I64Const [0x10000])))
|
||||
|
|
|
|||
|
|
@ -238,13 +238,13 @@ func init() {
|
|||
{name: "I64Extend16S", asm: "I64Extend16S", argLength: 1, reg: gp11, typ: "Int64"}, // sign-extend arg0 from 16 to 64 bit
|
||||
{name: "I64Extend32S", asm: "I64Extend32S", argLength: 1, reg: gp11, typ: "Int64"}, // sign-extend arg0 from 32 to 64 bit
|
||||
|
||||
{name: "F32Sqrt", asm: "F32Sqrt", argLength: 1, reg: fp64_11, typ: "Float32"}, // sqrt(arg0)
|
||||
{name: "F32Trunc", asm: "F32Trunc", argLength: 1, reg: fp64_11, typ: "Float32"}, // trunc(arg0)
|
||||
{name: "F32Ceil", asm: "F32Ceil", argLength: 1, reg: fp64_11, typ: "Float32"}, // ceil(arg0)
|
||||
{name: "F32Floor", asm: "F32Floor", argLength: 1, reg: fp64_11, typ: "Float32"}, // floor(arg0)
|
||||
{name: "F32Nearest", asm: "F32Nearest", argLength: 1, reg: fp64_11, typ: "Float32"}, // round(arg0)
|
||||
{name: "F32Abs", asm: "F32Abs", argLength: 1, reg: fp64_11, typ: "Float32"}, // abs(arg0)
|
||||
{name: "F32Copysign", asm: "F32Copysign", argLength: 2, reg: fp64_21, typ: "Float32"}, // copysign(arg0, arg1)
|
||||
{name: "F32Sqrt", asm: "F32Sqrt", argLength: 1, reg: fp32_11, typ: "Float32"}, // sqrt(arg0)
|
||||
{name: "F32Trunc", asm: "F32Trunc", argLength: 1, reg: fp32_11, typ: "Float32"}, // trunc(arg0)
|
||||
{name: "F32Ceil", asm: "F32Ceil", argLength: 1, reg: fp32_11, typ: "Float32"}, // ceil(arg0)
|
||||
{name: "F32Floor", asm: "F32Floor", argLength: 1, reg: fp32_11, typ: "Float32"}, // floor(arg0)
|
||||
{name: "F32Nearest", asm: "F32Nearest", argLength: 1, reg: fp32_11, typ: "Float32"}, // round(arg0)
|
||||
{name: "F32Abs", asm: "F32Abs", argLength: 1, reg: fp32_11, typ: "Float32"}, // abs(arg0)
|
||||
{name: "F32Copysign", asm: "F32Copysign", argLength: 2, reg: fp32_21, typ: "Float32"}, // copysign(arg0, arg1)
|
||||
|
||||
{name: "F64Sqrt", asm: "F64Sqrt", argLength: 1, reg: fp64_11, typ: "Float64"}, // sqrt(arg0)
|
||||
{name: "F64Trunc", asm: "F64Trunc", argLength: 1, reg: fp64_11, typ: "Float64"}, // trunc(arg0)
|
||||
|
|
|
|||
|
|
@ -1968,6 +1968,9 @@
|
|||
(Div32F x (Const32F <t> [c])) && reciprocalExact32(c) => (Mul32F x (Const32F <t> [1/c]))
|
||||
(Div64F x (Const64F <t> [c])) && reciprocalExact64(c) => (Mul64F x (Const64F <t> [1/c]))
|
||||
|
||||
// rewrite single-precision sqrt expression "float32(math.Sqrt(float64(x)))"
|
||||
(Cvt64Fto32F sqrt0:(Sqrt (Cvt32Fto64F x))) && sqrt0.Uses==1 => (Sqrt32 x)
|
||||
|
||||
(Sqrt (Const64F [c])) && !math.IsNaN(math.Sqrt(c)) => (Const64F [math.Sqrt(c)])
|
||||
|
||||
// for rewriting results of some late-expanded rewrites (below)
|
||||
|
|
|
|||
|
|
@ -258,13 +258,14 @@ var genericOps = []opData{
|
|||
{name: "RotateLeft32", argLength: 2}, // Rotate bits in arg[0] left by arg[1]
|
||||
{name: "RotateLeft64", argLength: 2}, // Rotate bits in arg[0] left by arg[1]
|
||||
|
||||
// Square root, float64 only.
|
||||
// Square root.
|
||||
// Special cases:
|
||||
// +∞ → +∞
|
||||
// ±0 → ±0 (sign preserved)
|
||||
// x<0 → NaN
|
||||
// NaN → NaN
|
||||
{name: "Sqrt", argLength: 1}, // √arg0
|
||||
{name: "Sqrt", argLength: 1}, // √arg0 (floating point, double precision)
|
||||
{name: "Sqrt32", argLength: 1}, // √arg0 (floating point, single precision)
|
||||
|
||||
// Round to integer, float64 only.
|
||||
// Special cases:
|
||||
|
|
|
|||
|
|
@ -432,6 +432,7 @@ const (
|
|||
Op386BSRW
|
||||
Op386BSWAPL
|
||||
Op386SQRTSD
|
||||
Op386SQRTSS
|
||||
Op386SBBLcarrymask
|
||||
Op386SETEQ
|
||||
Op386SETNE
|
||||
|
|
@ -888,6 +889,7 @@ const (
|
|||
OpAMD64POPCNTQ
|
||||
OpAMD64POPCNTL
|
||||
OpAMD64SQRTSD
|
||||
OpAMD64SQRTSS
|
||||
OpAMD64ROUNDSD
|
||||
OpAMD64VFMADD231SD
|
||||
OpAMD64SBBQcarrymask
|
||||
|
|
@ -1090,6 +1092,7 @@ const (
|
|||
OpARMNEGF
|
||||
OpARMNEGD
|
||||
OpARMSQRTD
|
||||
OpARMSQRTF
|
||||
OpARMABSD
|
||||
OpARMCLZ
|
||||
OpARMREV
|
||||
|
|
@ -1358,6 +1361,7 @@ const (
|
|||
OpARM64FNEGS
|
||||
OpARM64FNEGD
|
||||
OpARM64FSQRTD
|
||||
OpARM64FSQRTS
|
||||
OpARM64REV
|
||||
OpARM64REVW
|
||||
OpARM64REV16W
|
||||
|
|
@ -1641,6 +1645,7 @@ const (
|
|||
OpMIPSNEGF
|
||||
OpMIPSNEGD
|
||||
OpMIPSSQRTD
|
||||
OpMIPSSQRTF
|
||||
OpMIPSSLL
|
||||
OpMIPSSLLconst
|
||||
OpMIPSSRL
|
||||
|
|
@ -1751,6 +1756,7 @@ const (
|
|||
OpMIPS64NEGF
|
||||
OpMIPS64NEGD
|
||||
OpMIPS64SQRTD
|
||||
OpMIPS64SQRTF
|
||||
OpMIPS64SLLV
|
||||
OpMIPS64SLLVconst
|
||||
OpMIPS64SRLV
|
||||
|
|
@ -2301,6 +2307,7 @@ const (
|
|||
OpS390XNOT
|
||||
OpS390XNOTW
|
||||
OpS390XFSQRT
|
||||
OpS390XFSQRTS
|
||||
OpS390XLOCGR
|
||||
OpS390XMOVBreg
|
||||
OpS390XMOVBZreg
|
||||
|
|
@ -2727,6 +2734,7 @@ const (
|
|||
OpRotateLeft32
|
||||
OpRotateLeft64
|
||||
OpSqrt
|
||||
OpSqrt32
|
||||
OpFloor
|
||||
OpCeil
|
||||
OpTrunc
|
||||
|
|
@ -4778,6 +4786,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTSS",
|
||||
argLen: 1,
|
||||
asm: x86.ASQRTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 65280}, // X0 X1 X2 X3 X4 X5 X6 X7
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SBBLcarrymask",
|
||||
argLen: 1,
|
||||
|
|
@ -11630,6 +11651,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTSS",
|
||||
argLen: 1,
|
||||
asm: x86.ASQRTSS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 2147418112}, // X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13 X14
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ROUNDSD",
|
||||
auxType: auxInt8,
|
||||
|
|
@ -14424,6 +14458,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTF",
|
||||
argLen: 1,
|
||||
asm: arm.ASQRTF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "ABSD",
|
||||
argLen: 1,
|
||||
|
|
@ -18086,6 +18133,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FSQRTS",
|
||||
argLen: 1,
|
||||
asm: arm64.AFSQRTS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 9223372034707292160}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "REV",
|
||||
argLen: 1,
|
||||
|
|
@ -21879,6 +21939,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTF",
|
||||
argLen: 1,
|
||||
asm: mips.ASQRTF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 35183835217920}, // F0 F2 F4 F6 F8 F10 F12 F14 F16 F18 F20 F22 F24 F26 F28 F30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SLL",
|
||||
argLen: 2,
|
||||
|
|
@ -23358,6 +23431,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTF",
|
||||
argLen: 1,
|
||||
asm: mips.ASQRTF,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 1152921504338411520}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 1152921504338411520}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15 F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SLLV",
|
||||
argLen: 2,
|
||||
|
|
@ -30942,6 +31028,19 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "FSQRTS",
|
||||
argLen: 1,
|
||||
asm: s390x.AFSQRTS,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "LOCGR",
|
||||
auxType: auxS390XCCMask,
|
||||
|
|
@ -33876,10 +33975,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Sqrt,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33889,10 +33988,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Trunc,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33902,10 +34001,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Ceil,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33915,10 +34014,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Floor,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33928,10 +34027,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Nearest,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33941,10 +34040,10 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Abs,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -33954,11 +34053,11 @@ var opcodeTable = [...]opInfo{
|
|||
asm: wasm.AF32Copysign,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{1, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 281470681743360}, // F16 F17 F18 F19 F20 F21 F22 F23 F24 F25 F26 F27 F28 F29 F30 F31
|
||||
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
@ -35176,6 +35275,11 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Sqrt32",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Floor",
|
||||
argLen: 1,
|
||||
|
|
|
|||
|
|
@ -620,6 +620,9 @@ func rewriteValue386(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = Op386SQRTSD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = Op386SQRTSS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = Op386CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -1089,6 +1089,9 @@ func rewriteValueAMD64(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpAMD64SQRTSD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpAMD64SQRTSS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpAMD64CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -823,6 +823,9 @@ func rewriteValueARM(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpARMSQRTD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpARMSQRTF
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpARMCALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -999,6 +999,9 @@ func rewriteValueARM64(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpARM64FSQRTD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpARM64FSQRTS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpARM64CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -516,6 +516,9 @@ func rewriteValueMIPS(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpMIPSSQRTD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpMIPSSQRTF
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpMIPSCALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -596,6 +596,9 @@ func rewriteValueMIPS64(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpMIPS64SQRTD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpMIPS64SQRTF
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpMIPS64CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -743,6 +743,9 @@ func rewriteValuePPC64(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpPPC64FSQRT
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpPPC64FSQRTS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpPPC64CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -582,6 +582,9 @@ func rewriteValueRISCV64(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpRISCV64FSQRTD
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpRISCV64FSQRTS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpRISCV64CALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -792,6 +792,9 @@ func rewriteValueS390X(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpS390XFSQRT
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpS390XFSQRTS
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpS390XCALLstatic
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -527,6 +527,9 @@ func rewriteValueWasm(v *Value) bool {
|
|||
case OpSqrt:
|
||||
v.Op = OpWasmF64Sqrt
|
||||
return true
|
||||
case OpSqrt32:
|
||||
v.Op = OpWasmF32Sqrt
|
||||
return true
|
||||
case OpStaticCall:
|
||||
v.Op = OpWasmLoweredStaticCall
|
||||
return true
|
||||
|
|
|
|||
|
|
@ -4085,6 +4085,26 @@ func rewriteValuegeneric_OpCvt64Fto32F(v *Value) bool {
|
|||
v.AuxInt = float32ToAuxInt(float32(c))
|
||||
return true
|
||||
}
|
||||
// match: (Cvt64Fto32F sqrt0:(Sqrt (Cvt32Fto64F x)))
|
||||
// cond: sqrt0.Uses==1
|
||||
// result: (Sqrt32 x)
|
||||
for {
|
||||
sqrt0 := v_0
|
||||
if sqrt0.Op != OpSqrt {
|
||||
break
|
||||
}
|
||||
sqrt0_0 := sqrt0.Args[0]
|
||||
if sqrt0_0.Op != OpCvt32Fto64F {
|
||||
break
|
||||
}
|
||||
x := sqrt0_0.Args[0]
|
||||
if !(sqrt0.Uses == 1) {
|
||||
break
|
||||
}
|
||||
v.reset(OpSqrt32)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValuegeneric_OpCvt64Fto64(v *Value) bool {
|
||||
|
|
|
|||
|
|
@ -760,7 +760,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
|
|||
p.To.Reg = v.Reg()
|
||||
case ssa.Op386BSFL, ssa.Op386BSFW,
|
||||
ssa.Op386BSRL, ssa.Op386BSRW,
|
||||
ssa.Op386SQRTSD:
|
||||
ssa.Op386SQRTSS, ssa.Op386SQRTSD:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = v.Args[0].Reg()
|
||||
|
|
|
|||
|
|
@ -2067,6 +2067,21 @@ var fmaC = []struct{ x, y, z, want float64 }{
|
|||
{-7.751454006381804e-05, 5.588653777189071e-308, -2.2207280111272877e-308, -2.2211612130544025e-308},
|
||||
}
|
||||
|
||||
var sqrt32 = []float32{
|
||||
0,
|
||||
float32(Copysign(0, -1)),
|
||||
float32(NaN()),
|
||||
float32(Inf(1)),
|
||||
float32(Inf(-1)),
|
||||
1,
|
||||
2,
|
||||
-2,
|
||||
4.9790119248836735e+00,
|
||||
7.7388724745781045e+00,
|
||||
-2.7688005719200159e-01,
|
||||
-5.0106036182710749e+00,
|
||||
}
|
||||
|
||||
func tolerance(a, b, e float64) bool {
|
||||
// Multiplying by e here can underflow denormal values to zero.
|
||||
// Check a==b so that at least if a and b are small and identical
|
||||
|
|
@ -3181,6 +3196,25 @@ func TestFloatMinMax(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
var indirectSqrt = Sqrt
|
||||
|
||||
// TestFloat32Sqrt checks the correctness of the float32 square root optimization result.
|
||||
func TestFloat32Sqrt(t *testing.T) {
|
||||
for _, v := range sqrt32 {
|
||||
want := float32(indirectSqrt(float64(v)))
|
||||
got := float32(Sqrt(float64(v)))
|
||||
if IsNaN(float64(want)) {
|
||||
if !IsNaN(float64(got)) {
|
||||
t.Errorf("got=%#v want=NaN, v=%#v", got, v)
|
||||
}
|
||||
continue
|
||||
}
|
||||
if got != want {
|
||||
t.Errorf("got=%#v want=%#v, v=%#v", got, want, v)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Benchmarks
|
||||
|
||||
// Global exported variables are used to store the
|
||||
|
|
|
|||
|
|
@ -55,6 +55,17 @@ func sqrt(x float64) float64 {
|
|||
return math.Sqrt(x)
|
||||
}
|
||||
|
||||
func sqrt32(x float32) float32 {
|
||||
// amd64:"SQRTSS"
|
||||
// 386/sse2:"SQRTSS" 386/softfloat:-"SQRTS"
|
||||
// arm64:"FSQRTS"
|
||||
// arm/7:"SQRTF"
|
||||
// mips/hardfloat:"SQRTF" mips/softfloat:-"SQRTF"
|
||||
// mips64/hardfloat:"SQRTF" mips64/softfloat:-"SQRTF"
|
||||
// wasm:"F32Sqrt"
|
||||
return float32(math.Sqrt(float64(x)))
|
||||
}
|
||||
|
||||
// Check that it's using integer registers
|
||||
func abs(x, y float64) {
|
||||
// amd64:"BTRQ\t[$]63"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue