mirror of
https://github.com/golang/go.git
synced 2025-12-08 06:10:04 +00:00
cmd/asm, cmd/compile: optimize math.Abs and math.Copysign on s390x
This change adds three new instructions: - LPDFR: load positive (math.Abs(x)) - LNDFR: load negative (-math.Abs(x)) - CPSDR: copy sign (math.Copysign(x, y)) By making use of GPR <-> FPR moves we can now compile math.Abs and math.Copysign to these instructions using SSA rules. This CL also adds new rules to merge address generation into combined load operations. This makes GPR <-> FPR move matching more reliable. name old time/op new time/op delta Copysign 1.85ns ± 0% 1.40ns ± 1% -24.65% (p=0.000 n=8+10) Abs 1.58ns ± 1% 0.73ns ± 1% -53.64% (p=0.000 n=10+10) The geo mean improvement for all math package benchmarks was 4.6%. Change-Id: I0cec35c5c1b3fb45243bf666b56b57faca981bc9 Reviewed-on: https://go-review.googlesource.com/73950 Run-TryBot: Michael Munday <mike.munday@ibm.com> Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
parent
7fff1db060
commit
96cdacb971
10 changed files with 2035 additions and 134 deletions
3
src/cmd/asm/internal/asm/testdata/s390x.s
vendored
3
src/cmd/asm/internal/asm/testdata/s390x.s
vendored
|
|
@ -296,6 +296,9 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
|
||||||
FMADDS F1, F2, F3 // b30e3012
|
FMADDS F1, F2, F3 // b30e3012
|
||||||
FMSUB F4, F5, F5 // b31f5045
|
FMSUB F4, F5, F5 // b31f5045
|
||||||
FMSUBS F6, F6, F7 // b30f7066
|
FMSUBS F6, F6, F7 // b30f7066
|
||||||
|
LPDFR F1, F2 // b3700021
|
||||||
|
LNDFR F3, F4 // b3710043
|
||||||
|
CPSDR F5, F6, F7 // b3725076
|
||||||
|
|
||||||
VL (R15), V1 // e710f0000006
|
VL (R15), V1 // e710f0000006
|
||||||
VST V1, (R15) // e710f000000e
|
VST V1, (R15) // e710f000000e
|
||||||
|
|
|
||||||
|
|
@ -1691,6 +1691,70 @@ var linuxS390XTests = []*asmTest{
|
||||||
pos: []string{"\tMOV(B|BZ|D)\t[$]1,"},
|
pos: []string{"\tMOV(B|BZ|D)\t[$]1,"},
|
||||||
neg: []string{"\tCEBR\t", "\tMOV(B|BZ|D)\t[$]0,"},
|
neg: []string{"\tCEBR\t", "\tMOV(B|BZ|D)\t[$]0,"},
|
||||||
},
|
},
|
||||||
|
// math tests
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return math.Abs(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tLPDFR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float32) float32 {
|
||||||
|
return float32(math.Abs(float64(x)))
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tLPDFR\t"},
|
||||||
|
neg: []string{"\tLDEBR\t", "\tLEDBR\t"}, // no float64 conversion
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return math.Float64frombits(math.Float64bits(x)|1<<63)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tLNDFR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return -math.Abs(x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tLNDFR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x, y float64) float64 {
|
||||||
|
return math.Copysign(x, y)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tCPSDR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return math.Copysign(x, -1)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tLNDFR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
|
{
|
||||||
|
fn: `
|
||||||
|
func $(x float64) float64 {
|
||||||
|
return math.Copysign(-1, x)
|
||||||
|
}
|
||||||
|
`,
|
||||||
|
pos: []string{"\tCPSDR\t"},
|
||||||
|
neg: []string{"\tMOVD\t"}, // no integer loads/stores
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
var linuxARMTests = []*asmTest{
|
var linuxARMTests = []*asmTest{
|
||||||
|
|
|
||||||
|
|
@ -214,6 +214,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
default:
|
default:
|
||||||
v.Fatalf("invalid FIDBR mask: %v", v.AuxInt)
|
v.Fatalf("invalid FIDBR mask: %v", v.AuxInt)
|
||||||
}
|
}
|
||||||
|
case ssa.OpS390XCPSDR:
|
||||||
|
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
|
||||||
|
p.Reg = v.Args[0].Reg()
|
||||||
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
|
case ssa.OpS390XDIVD, ssa.OpS390XDIVW,
|
||||||
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
|
ssa.OpS390XDIVDU, ssa.OpS390XDIVWU,
|
||||||
ssa.OpS390XMODD, ssa.OpS390XMODW,
|
ssa.OpS390XMODD, ssa.OpS390XMODW,
|
||||||
|
|
@ -432,10 +435,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
||||||
gc.AddAux2(&p.To, v, sc.Off())
|
gc.AddAux2(&p.To, v, sc.Off())
|
||||||
case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg,
|
case ssa.OpS390XMOVBreg, ssa.OpS390XMOVHreg, ssa.OpS390XMOVWreg,
|
||||||
ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg,
|
ssa.OpS390XMOVBZreg, ssa.OpS390XMOVHZreg, ssa.OpS390XMOVWZreg,
|
||||||
|
ssa.OpS390XLDGR, ssa.OpS390XLGDR,
|
||||||
ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA,
|
ssa.OpS390XCEFBRA, ssa.OpS390XCDFBRA, ssa.OpS390XCEGBRA, ssa.OpS390XCDGBRA,
|
||||||
ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA,
|
ssa.OpS390XCFEBRA, ssa.OpS390XCFDBRA, ssa.OpS390XCGEBRA, ssa.OpS390XCGDBRA,
|
||||||
ssa.OpS390XLDEBR, ssa.OpS390XLEDBR,
|
ssa.OpS390XLDEBR, ssa.OpS390XLEDBR,
|
||||||
ssa.OpS390XFNEG, ssa.OpS390XFNEGS:
|
ssa.OpS390XFNEG, ssa.OpS390XFNEGS,
|
||||||
|
ssa.OpS390XLPDFR, ssa.OpS390XLNDFR:
|
||||||
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
|
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
|
||||||
case ssa.OpS390XCLEAR:
|
case ssa.OpS390XCLEAR:
|
||||||
p := s.Prog(v.Op.Asm())
|
p := s.Prog(v.Op.Asm())
|
||||||
|
|
|
||||||
|
|
@ -688,10 +688,55 @@
|
||||||
(MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
|
(MOVWZreg x:(MOVWZloadidx [off] {sym} ptr idx mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVWZloadidx <v.Type> [off] {sym} ptr idx mem)
|
||||||
|
|
||||||
// replace load from same location as preceding store with copy
|
// replace load from same location as preceding store with copy
|
||||||
(MOVBZload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBZreg x)
|
(MOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVDreg x)
|
||||||
(MOVHZload [off] {sym} ptr (MOVHstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVHZreg x)
|
(MOVWload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWreg x)
|
||||||
(MOVWZload [off] {sym} ptr (MOVWstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVWZreg x)
|
(MOVHload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHreg x)
|
||||||
(MOVDload [off] {sym} ptr (MOVDstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDreg x)
|
(MOVBload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBreg x)
|
||||||
|
(MOVWZload [off] {sym} ptr1 (MOVWstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVWZreg x)
|
||||||
|
(MOVHZload [off] {sym} ptr1 (MOVHstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVHZreg x)
|
||||||
|
(MOVBZload [off] {sym} ptr1 (MOVBstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (MOVBZreg x)
|
||||||
|
(MOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LGDR x)
|
||||||
|
(FMOVDload [off] {sym} ptr1 (MOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> (LDGR x)
|
||||||
|
(FMOVDload [off] {sym} ptr1 (FMOVDstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
|
||||||
|
(FMOVSload [off] {sym} ptr1 (FMOVSstore [off] {sym} ptr2 x _)) && isSamePtr(ptr1, ptr2) -> x
|
||||||
|
|
||||||
|
// prefer FPR <-> GPR moves over combined load ops
|
||||||
|
(MULLDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (MULLD x (LGDR <t> y))
|
||||||
|
(ADDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (ADD x (LGDR <t> y))
|
||||||
|
(SUBload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (SUB x (LGDR <t> y))
|
||||||
|
(ORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (OR x (LGDR <t> y))
|
||||||
|
(ANDload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (AND x (LGDR <t> y))
|
||||||
|
(XORload <t> [off] {sym} x ptr1 (FMOVDstore [off] {sym} ptr2 y _)) && isSamePtr(ptr1, ptr2) -> (XOR x (LGDR <t> y))
|
||||||
|
|
||||||
|
// detect attempts to set/clear the sign bit
|
||||||
|
// may need to be reworked when NIHH/OIHH are added
|
||||||
|
(SRDconst [1] (SLDconst [1] (LGDR <t> x))) -> (LGDR <t> (LPDFR <x.Type> x))
|
||||||
|
(LDGR <t> (SRDconst [1] (SLDconst [1] x))) -> (LPDFR (LDGR <t> x))
|
||||||
|
(OR (MOVDconst [-1<<63]) (LGDR <t> x)) -> (LGDR <t> (LNDFR <x.Type> x))
|
||||||
|
(LDGR <t> (OR (MOVDconst [-1<<63]) x)) -> (LNDFR (LDGR <t> x))
|
||||||
|
|
||||||
|
// detect attempts to set the sign bit with load
|
||||||
|
(LDGR <t> x:(ORload <t1> [off] {sym} (MOVDconst [-1<<63]) ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (LNDFR <t> (LDGR <t> (MOVDload <t1> [off] {sym} ptr mem)))
|
||||||
|
|
||||||
|
// detect copysign
|
||||||
|
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (LGDR (LPDFR <t> y))) -> (LGDR (CPSDR <t> y x))
|
||||||
|
(OR (SLDconst [63] (SRDconst [63] (LGDR x))) (MOVDconst [c])) && c & -1<<63 == 0 -> (LGDR (CPSDR <x.Type> (FMOVDconst <x.Type> [c]) x))
|
||||||
|
(CPSDR y (FMOVDconst [c])) && c & -1<<63 == 0 -> (LPDFR y)
|
||||||
|
(CPSDR y (FMOVDconst [c])) && c & -1<<63 != 0 -> (LNDFR y)
|
||||||
|
|
||||||
|
// absorb negations into set/clear sign bit
|
||||||
|
(FNEG (LPDFR x)) -> (LNDFR x)
|
||||||
|
(FNEG (LNDFR x)) -> (LPDFR x)
|
||||||
|
(FNEGS (LPDFR x)) -> (LNDFR x)
|
||||||
|
(FNEGS (LNDFR x)) -> (LPDFR x)
|
||||||
|
|
||||||
|
// no need to convert float32 to float64 to set/clear sign bit
|
||||||
|
(LEDBR (LPDFR (LDEBR x))) -> (LPDFR x)
|
||||||
|
(LEDBR (LNDFR (LDEBR x))) -> (LNDFR x)
|
||||||
|
|
||||||
|
// remove unnecessary FPR <-> GPR moves
|
||||||
|
(LDGR (LGDR x)) -> x
|
||||||
|
(LGDR (LDGR x)) -> (MOVDreg x)
|
||||||
|
|
||||||
// Don't extend before storing
|
// Don't extend before storing
|
||||||
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
|
||||||
|
|
@ -723,6 +768,20 @@
|
||||||
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
|
(FMOVSstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVSstore [off1+off2] {sym} ptr val mem)
|
||||||
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
|
(FMOVDstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is20Bit(off1+off2) -> (FMOVDstore [off1+off2] {sym} ptr val mem)
|
||||||
|
|
||||||
|
(ADDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDload [off1+off2] {sym} x ptr mem)
|
||||||
|
(ADDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ADDWload [off1+off2] {sym} x ptr mem)
|
||||||
|
(MULLDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLDload [off1+off2] {sym} x ptr mem)
|
||||||
|
(MULLWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (MULLWload [off1+off2] {sym} x ptr mem)
|
||||||
|
(SUBload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBload [off1+off2] {sym} x ptr mem)
|
||||||
|
(SUBWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (SUBWload [off1+off2] {sym} x ptr mem)
|
||||||
|
|
||||||
|
(ANDload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDload [off1+off2] {sym} x ptr mem)
|
||||||
|
(ANDWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ANDWload [off1+off2] {sym} x ptr mem)
|
||||||
|
(ORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORload [off1+off2] {sym} x ptr mem)
|
||||||
|
(ORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (ORWload [off1+off2] {sym} x ptr mem)
|
||||||
|
(XORload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORload [off1+off2] {sym} x ptr mem)
|
||||||
|
(XORWload [off1] {sym} x (ADDconst [off2] ptr) mem) && ptr.Op != OpSB && is20Bit(off1+off2) -> (XORWload [off1+off2] {sym} x ptr mem)
|
||||||
|
|
||||||
// Fold constants into stores.
|
// Fold constants into stores.
|
||||||
(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
|
(MOVDstore [off] {sym} ptr (MOVDconst [c]) mem) && is16Bit(c) && isU12Bit(off) && ptr.Op != OpSB ->
|
||||||
(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
|
(MOVDstoreconst [makeValAndOff(c,off)] {sym} ptr mem)
|
||||||
|
|
@ -780,6 +839,20 @@
|
||||||
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
(FMOVDstore [off1] {sym1} (MOVDaddr [off2] {sym2} base) val mem) && is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
|
||||||
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
(FMOVDstore [off1+off2] {mergeSym(sym1,sym2)} base val mem)
|
||||||
|
|
||||||
|
(ADDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(ADDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ADDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(MULLDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(MULLWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (MULLWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(SUBload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(SUBWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (SUBWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
|
||||||
|
(ANDload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(ANDWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ANDWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(ORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(ORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (ORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(XORload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
(XORWload [o1] {s1} x (MOVDaddr [o2] {s2} ptr) mem) && ptr.Op != OpSB && is20Bit(o1+o2) && canMergeSym(s1, s2) -> (XORWload [o1+o2] {mergeSym(s1, s2)} x ptr mem)
|
||||||
|
|
||||||
// Cannot store constant to SB directly (no 'move relative long immediate' instructions).
|
// Cannot store constant to SB directly (no 'move relative long immediate' instructions).
|
||||||
(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
(MOVDstoreconst [sc] {sym1} (MOVDaddr [off] {sym2} ptr) mem) && ptr.Op != OpSB && canMergeSym(sym1, sym2) && ValAndOff(sc).canAdd(off) ->
|
||||||
(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
(MOVDstoreconst [ValAndOff(sc).add(off)] {mergeSym(sym1, sym2)} ptr mem)
|
||||||
|
|
|
||||||
|
|
@ -205,6 +205,9 @@ func init() {
|
||||||
{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0
|
{name: "FMADD", argLength: 3, reg: fp31, asm: "FMADD", resultInArg0: true}, // fp64 arg1 * arg2 + arg0
|
||||||
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
|
{name: "FMSUBS", argLength: 3, reg: fp31, asm: "FMSUBS", resultInArg0: true}, // fp32 arg1 * arg2 - arg0
|
||||||
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
|
{name: "FMSUB", argLength: 3, reg: fp31, asm: "FMSUB", resultInArg0: true}, // fp64 arg1 * arg2 - arg0
|
||||||
|
{name: "LPDFR", argLength: 1, reg: fp11, asm: "LPDFR"}, // fp64/fp32 set sign bit
|
||||||
|
{name: "LNDFR", argLength: 1, reg: fp11, asm: "LNDFR"}, // fp64/fp32 clear sign bit
|
||||||
|
{name: "CPSDR", argLength: 2, reg: fp21, asm: "CPSDR"}, // fp64/fp32 copy arg1 sign bit to arg0
|
||||||
|
|
||||||
// Round to integer, float64 only.
|
// Round to integer, float64 only.
|
||||||
//
|
//
|
||||||
|
|
@ -357,6 +360,8 @@ func init() {
|
||||||
|
|
||||||
{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
|
{name: "MOVDconst", reg: gp01, asm: "MOVD", typ: "UInt64", aux: "Int64", rematerializeable: true}, // auxint
|
||||||
|
|
||||||
|
{name: "LDGR", argLength: 1, reg: gpfp, asm: "LDGR"}, // move int64 to float64 (no conversion)
|
||||||
|
{name: "LGDR", argLength: 1, reg: fpgp, asm: "LGDR"}, // move float64 to int64 (no conversion)
|
||||||
{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32
|
{name: "CFDBRA", argLength: 1, reg: fpgp, asm: "CFDBRA"}, // convert float64 to int32
|
||||||
{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64
|
{name: "CGDBRA", argLength: 1, reg: fpgp, asm: "CGDBRA"}, // convert float64 to int64
|
||||||
{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32
|
{name: "CFEBRA", argLength: 1, reg: fpgp, asm: "CFEBRA"}, // convert float32 to int32
|
||||||
|
|
|
||||||
|
|
@ -1505,6 +1505,9 @@ const (
|
||||||
OpS390XFMADD
|
OpS390XFMADD
|
||||||
OpS390XFMSUBS
|
OpS390XFMSUBS
|
||||||
OpS390XFMSUB
|
OpS390XFMSUB
|
||||||
|
OpS390XLPDFR
|
||||||
|
OpS390XLNDFR
|
||||||
|
OpS390XCPSDR
|
||||||
OpS390XFIDBR
|
OpS390XFIDBR
|
||||||
OpS390XFMOVSload
|
OpS390XFMOVSload
|
||||||
OpS390XFMOVDload
|
OpS390XFMOVDload
|
||||||
|
|
@ -1610,6 +1613,8 @@ const (
|
||||||
OpS390XMOVDreg
|
OpS390XMOVDreg
|
||||||
OpS390XMOVDnop
|
OpS390XMOVDnop
|
||||||
OpS390XMOVDconst
|
OpS390XMOVDconst
|
||||||
|
OpS390XLDGR
|
||||||
|
OpS390XLGDR
|
||||||
OpS390XCFDBRA
|
OpS390XCFDBRA
|
||||||
OpS390XCGDBRA
|
OpS390XCGDBRA
|
||||||
OpS390XCFEBRA
|
OpS390XCFEBRA
|
||||||
|
|
@ -19385,6 +19390,46 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LPDFR",
|
||||||
|
argLen: 1,
|
||||||
|
asm: s390x.ALPDFR,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LNDFR",
|
||||||
|
argLen: 1,
|
||||||
|
asm: s390x.ALNDFR,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "CPSDR",
|
||||||
|
argLen: 2,
|
||||||
|
asm: s390x.ACPSDR,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
{1, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "FIDBR",
|
name: "FIDBR",
|
||||||
auxType: auxInt8,
|
auxType: auxInt8,
|
||||||
|
|
@ -20950,6 +20995,32 @@ var opcodeTable = [...]opInfo{
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "LDGR",
|
||||||
|
argLen: 1,
|
||||||
|
asm: s390x.ALDGR,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "LGDR",
|
||||||
|
argLen: 1,
|
||||||
|
asm: s390x.ALGDR,
|
||||||
|
reg: regInfo{
|
||||||
|
inputs: []inputInfo{
|
||||||
|
{0, 4294901760}, // F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 F13 F14 F15
|
||||||
|
},
|
||||||
|
outputs: []outputInfo{
|
||||||
|
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
name: "CFDBRA",
|
name: "CFDBRA",
|
||||||
argLen: 1,
|
argLen: 1,
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -283,12 +283,15 @@ const (
|
||||||
AFNEGS
|
AFNEGS
|
||||||
ALEDBR
|
ALEDBR
|
||||||
ALDEBR
|
ALDEBR
|
||||||
|
ALPDFR
|
||||||
|
ALNDFR
|
||||||
AFSUB
|
AFSUB
|
||||||
AFSUBS
|
AFSUBS
|
||||||
AFSQRT
|
AFSQRT
|
||||||
AFSQRTS
|
AFSQRTS
|
||||||
AFIEBR
|
AFIEBR
|
||||||
AFIDBR
|
AFIDBR
|
||||||
|
ACPSDR
|
||||||
|
|
||||||
// move from GPR to FPR and vice versa
|
// move from GPR to FPR and vice versa
|
||||||
ALDGR
|
ALDGR
|
||||||
|
|
|
||||||
|
|
@ -81,12 +81,15 @@ var Anames = []string{
|
||||||
"FNEGS",
|
"FNEGS",
|
||||||
"LEDBR",
|
"LEDBR",
|
||||||
"LDEBR",
|
"LDEBR",
|
||||||
|
"LPDFR",
|
||||||
|
"LNDFR",
|
||||||
"FSUB",
|
"FSUB",
|
||||||
"FSUBS",
|
"FSUBS",
|
||||||
"FSQRT",
|
"FSQRT",
|
||||||
"FSQRTS",
|
"FSQRTS",
|
||||||
"FIEBR",
|
"FIEBR",
|
||||||
"FIDBR",
|
"FIDBR",
|
||||||
|
"CPSDR",
|
||||||
"LDGR",
|
"LDGR",
|
||||||
"LGDR",
|
"LGDR",
|
||||||
"CEFBRA",
|
"CEFBRA",
|
||||||
|
|
|
||||||
|
|
@ -212,6 +212,7 @@ var optab = []Optab{
|
||||||
Optab{ACEFBRA, C_REG, C_NONE, C_NONE, C_FREG, 82, 0},
|
Optab{ACEFBRA, C_REG, C_NONE, C_NONE, C_FREG, 82, 0},
|
||||||
Optab{ACFEBRA, C_FREG, C_NONE, C_NONE, C_REG, 83, 0},
|
Optab{ACFEBRA, C_FREG, C_NONE, C_NONE, C_REG, 83, 0},
|
||||||
Optab{AFIEBR, C_SCON, C_FREG, C_NONE, C_FREG, 48, 0},
|
Optab{AFIEBR, C_SCON, C_FREG, C_NONE, C_FREG, 48, 0},
|
||||||
|
Optab{ACPSDR, C_FREG, C_FREG, C_NONE, C_FREG, 49, 0},
|
||||||
|
|
||||||
// load symbol address (plus offset)
|
// load symbol address (plus offset)
|
||||||
Optab{AMOVD, C_SYMADDR, C_NONE, C_NONE, C_REG, 19, 0},
|
Optab{AMOVD, C_SYMADDR, C_NONE, C_NONE, C_REG, 19, 0},
|
||||||
|
|
@ -897,6 +898,8 @@ func buildop(ctxt *obj.Link) {
|
||||||
opset(ABCL, r)
|
opset(ABCL, r)
|
||||||
case AFABS:
|
case AFABS:
|
||||||
opset(AFNABS, r)
|
opset(AFNABS, r)
|
||||||
|
opset(ALPDFR, r)
|
||||||
|
opset(ALNDFR, r)
|
||||||
opset(AFNEG, r)
|
opset(AFNEG, r)
|
||||||
opset(AFNEGS, r)
|
opset(AFNEGS, r)
|
||||||
opset(ALEDBR, r)
|
opset(ALEDBR, r)
|
||||||
|
|
@ -3182,6 +3185,10 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
|
||||||
opcode = op_LPDBR
|
opcode = op_LPDBR
|
||||||
case AFNABS:
|
case AFNABS:
|
||||||
opcode = op_LNDBR
|
opcode = op_LNDBR
|
||||||
|
case ALPDFR:
|
||||||
|
opcode = op_LPDFR
|
||||||
|
case ALNDFR:
|
||||||
|
opcode = op_LNDFR
|
||||||
case AFNEG:
|
case AFNEG:
|
||||||
opcode = op_LCDFR
|
opcode = op_LCDFR
|
||||||
case AFNEGS:
|
case AFNEGS:
|
||||||
|
|
@ -3281,6 +3288,9 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
|
||||||
}
|
}
|
||||||
zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm)
|
zRRF(opcode, uint32(m3), 0, uint32(p.To.Reg), uint32(p.Reg), asm)
|
||||||
|
|
||||||
|
case 49: // copysign
|
||||||
|
zRRF(op_CPSDR, uint32(p.From.Reg), 0, uint32(p.To.Reg), uint32(p.Reg), asm)
|
||||||
|
|
||||||
case 67: // fmov $0 freg
|
case 67: // fmov $0 freg
|
||||||
var opcode uint32
|
var opcode uint32
|
||||||
switch p.As {
|
switch p.As {
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue