2015-02-13 14:40:36 -05:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import "cmd/internal/obj"
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func overlap_cplx(f *Node, t *Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
// check whether f and t could be overlapping stack references.
|
|
|
|
|
// not exact, because it's hard to check for the stack register
|
|
|
|
|
// in portable code. close enough: worst case we will allocate
|
|
|
|
|
// an extra temporary and the registerizer will clean it up.
|
2015-02-17 22:13:49 -05:00
|
|
|
return f.Op == OINDREG && t.Op == OINDREG && f.Xoffset+f.Type.Width >= t.Xoffset && t.Xoffset+t.Type.Width >= f.Xoffset
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
func complexbool(op Op, nl, nr, res *Node, wantTrue bool, likely int, to *obj.Prog) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// make both sides addable in ullman order
|
|
|
|
|
if nr != nil {
|
2015-04-02 19:58:37 -07:00
|
|
|
if nl.Ullman > nr.Ullman && !nl.Addable {
|
2015-04-06 19:36:36 -07:00
|
|
|
nl = CgenTemp(nl)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nr.Addable {
|
2015-04-06 19:36:36 -07:00
|
|
|
nr = CgenTemp(nr)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nl.Addable {
|
2015-04-06 19:36:36 -07:00
|
|
|
nl = CgenTemp(nl)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// Break nl and nr into real and imaginary components.
|
|
|
|
|
var lreal, limag, rreal, rimag Node
|
|
|
|
|
subnode(&lreal, &limag, nl)
|
|
|
|
|
subnode(&rreal, &rimag, nr)
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
// build tree
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
// if branching:
|
|
|
|
|
// real(l) == real(r) && imag(l) == imag(r)
|
|
|
|
|
// if generating a value, use a branch-free version:
|
|
|
|
|
// real(l) == real(r) & imag(l) == imag(r)
|
2015-04-06 19:36:36 -07:00
|
|
|
realeq := Node{
|
|
|
|
|
Op: OEQ,
|
|
|
|
|
Left: &lreal,
|
|
|
|
|
Right: &rreal,
|
|
|
|
|
Type: Types[TBOOL],
|
|
|
|
|
}
|
|
|
|
|
imageq := Node{
|
|
|
|
|
Op: OEQ,
|
|
|
|
|
Left: &limag,
|
|
|
|
|
Right: &rimag,
|
|
|
|
|
Type: Types[TBOOL],
|
|
|
|
|
}
|
|
|
|
|
and := Node{
|
|
|
|
|
Op: OANDAND,
|
|
|
|
|
Left: &realeq,
|
|
|
|
|
Right: &imageq,
|
|
|
|
|
Type: Types[TBOOL],
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if res != nil {
|
|
|
|
|
// generating a value
|
|
|
|
|
and.Op = OAND
|
|
|
|
|
if op == ONE {
|
|
|
|
|
and.Op = OOR
|
|
|
|
|
realeq.Op = ONE
|
|
|
|
|
imageq.Op = ONE
|
|
|
|
|
}
|
|
|
|
|
Bvgen(&and, res, true)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// generating a branch
|
2015-02-13 14:40:36 -05:00
|
|
|
if op == ONE {
|
2015-04-06 19:36:36 -07:00
|
|
|
wantTrue = !wantTrue
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
Bgen(&and, wantTrue, likely, to)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// break addable nc-complex into nr-real and ni-imaginary
|
|
|
|
|
func subnode(nr *Node, ni *Node, nc *Node) {
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nc.Addable {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("subnode not addable")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
tc := Simsimtype(nc.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
tc = cplxsubtype(tc)
|
2015-02-23 16:07:24 -05:00
|
|
|
t := Types[tc]
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if nc.Op == OLITERAL {
|
2016-04-22 12:27:29 -07:00
|
|
|
u := nc.Val().U.(*Mpcplx)
|
|
|
|
|
nodfconst(nr, t, &u.Real)
|
|
|
|
|
nodfconst(ni, t, &u.Imag)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*nr = *nc
|
|
|
|
|
nr.Type = t
|
|
|
|
|
|
|
|
|
|
*ni = *nc
|
|
|
|
|
ni.Type = t
|
|
|
|
|
ni.Xoffset += t.Width
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// generate code res = -nl
|
|
|
|
|
func minus(nl *Node, res *Node) {
|
2015-03-02 14:22:05 -05:00
|
|
|
var ra Node
|
2015-02-13 14:40:36 -05:00
|
|
|
ra.Op = OMINUS
|
|
|
|
|
ra.Left = nl
|
|
|
|
|
ra.Type = nl.Type
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&ra, res)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// build and execute tree
|
|
|
|
|
// real(res) = -real(nl)
|
|
|
|
|
// imag(res) = -imag(nl)
|
|
|
|
|
func complexminus(nl *Node, res *Node) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
var n5 Node
|
|
|
|
|
var n6 Node
|
|
|
|
|
|
|
|
|
|
subnode(&n1, &n2, nl)
|
|
|
|
|
subnode(&n5, &n6, res)
|
|
|
|
|
|
|
|
|
|
minus(&n1, &n5)
|
|
|
|
|
minus(&n2, &n6)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// build and execute tree
|
|
|
|
|
// real(res) = real(nl) op real(nr)
|
|
|
|
|
// imag(res) = imag(nl) op imag(nr)
|
2015-09-24 23:21:18 +02:00
|
|
|
func complexadd(op Op, nl *Node, nr *Node, res *Node) {
|
2015-02-13 14:40:36 -05:00
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
var n3 Node
|
|
|
|
|
var n4 Node
|
|
|
|
|
var n5 Node
|
|
|
|
|
var n6 Node
|
|
|
|
|
|
|
|
|
|
subnode(&n1, &n2, nl)
|
|
|
|
|
subnode(&n3, &n4, nr)
|
|
|
|
|
subnode(&n5, &n6, res)
|
|
|
|
|
|
2015-03-02 14:22:05 -05:00
|
|
|
var ra Node
|
2015-09-24 23:21:18 +02:00
|
|
|
ra.Op = op
|
2015-02-13 14:40:36 -05:00
|
|
|
ra.Left = &n1
|
|
|
|
|
ra.Right = &n3
|
|
|
|
|
ra.Type = n1.Type
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&ra, &n5)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
ra = Node{}
|
2015-09-24 23:21:18 +02:00
|
|
|
ra.Op = op
|
2015-02-13 14:40:36 -05:00
|
|
|
ra.Left = &n2
|
|
|
|
|
ra.Right = &n4
|
|
|
|
|
ra.Type = n2.Type
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&ra, &n6)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// build and execute tree
|
|
|
|
|
// tmp = real(nl)*real(nr) - imag(nl)*imag(nr)
|
|
|
|
|
// imag(res) = real(nl)*imag(nr) + imag(nl)*real(nr)
|
|
|
|
|
// real(res) = tmp
|
|
|
|
|
func complexmul(nl *Node, nr *Node, res *Node) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
var n3 Node
|
|
|
|
|
var n4 Node
|
|
|
|
|
var n5 Node
|
|
|
|
|
var n6 Node
|
|
|
|
|
var tmp Node
|
|
|
|
|
|
|
|
|
|
subnode(&n1, &n2, nl)
|
|
|
|
|
subnode(&n3, &n4, nr)
|
|
|
|
|
subnode(&n5, &n6, res)
|
|
|
|
|
Tempname(&tmp, n5.Type)
|
|
|
|
|
|
|
|
|
|
// real part -> tmp
|
2015-03-02 14:22:05 -05:00
|
|
|
var rm1 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
rm1.Op = OMUL
|
|
|
|
|
rm1.Left = &n1
|
|
|
|
|
rm1.Right = &n3
|
|
|
|
|
rm1.Type = n1.Type
|
|
|
|
|
|
2015-03-02 14:22:05 -05:00
|
|
|
var rm2 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
rm2.Op = OMUL
|
|
|
|
|
rm2.Left = &n2
|
|
|
|
|
rm2.Right = &n4
|
|
|
|
|
rm2.Type = n2.Type
|
|
|
|
|
|
2015-03-02 14:22:05 -05:00
|
|
|
var ra Node
|
2015-02-13 14:40:36 -05:00
|
|
|
ra.Op = OSUB
|
|
|
|
|
ra.Left = &rm1
|
|
|
|
|
ra.Right = &rm2
|
|
|
|
|
ra.Type = rm1.Type
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&ra, &tmp)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// imag part
|
|
|
|
|
rm1 = Node{}
|
|
|
|
|
|
|
|
|
|
rm1.Op = OMUL
|
|
|
|
|
rm1.Left = &n1
|
|
|
|
|
rm1.Right = &n4
|
|
|
|
|
rm1.Type = n1.Type
|
|
|
|
|
|
|
|
|
|
rm2 = Node{}
|
|
|
|
|
rm2.Op = OMUL
|
|
|
|
|
rm2.Left = &n2
|
|
|
|
|
rm2.Right = &n3
|
|
|
|
|
rm2.Type = n2.Type
|
|
|
|
|
|
|
|
|
|
ra = Node{}
|
|
|
|
|
ra.Op = OADD
|
|
|
|
|
ra.Left = &rm1
|
|
|
|
|
ra.Right = &rm2
|
|
|
|
|
ra.Type = rm1.Type
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&ra, &n6)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// tmp ->real part
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&tmp, &n5)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func nodfconst(n *Node, t *Type, fval *Mpflt) {
|
|
|
|
|
*n = Node{}
|
|
|
|
|
n.Op = OLITERAL
|
2015-04-02 19:58:37 -07:00
|
|
|
n.Addable = true
|
2015-02-13 14:40:36 -05:00
|
|
|
ullmancalc(n)
|
2015-05-27 00:47:05 -04:00
|
|
|
n.SetVal(Val{fval})
|
2015-02-13 14:40:36 -05:00
|
|
|
n.Type = t
|
|
|
|
|
|
2016-03-30 15:09:25 -07:00
|
|
|
if !t.IsFloat() {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("nodfconst: bad type %v", t)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func Complexop(n *Node, res *Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n != nil && n.Type != nil {
|
2016-03-30 15:09:25 -07:00
|
|
|
if n.Type.IsComplex() {
|
2015-02-13 14:40:36 -05:00
|
|
|
goto maybe
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if res != nil && res.Type != nil {
|
2016-03-30 15:09:25 -07:00
|
|
|
if res.Type.IsComplex() {
|
2015-02-13 14:40:36 -05:00
|
|
|
goto maybe
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Op == OREAL || n.Op == OIMAG {
|
2015-03-02 12:35:15 -05:00
|
|
|
//dump("\ncomplex-yes", n);
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-03-02 12:35:15 -05:00
|
|
|
//dump("\ncomplex-no", n);
|
|
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
maybe:
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OCONV, // implemented ops
|
|
|
|
|
OADD,
|
|
|
|
|
OSUB,
|
|
|
|
|
OMUL,
|
|
|
|
|
OMINUS,
|
|
|
|
|
OCOMPLEX,
|
|
|
|
|
OREAL,
|
|
|
|
|
OIMAG:
|
2015-03-02 12:35:15 -05:00
|
|
|
//dump("\ncomplex-yes", n);
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ODOT,
|
|
|
|
|
ODOTPTR,
|
|
|
|
|
OINDEX,
|
|
|
|
|
OIND,
|
|
|
|
|
ONAME:
|
2015-03-02 12:35:15 -05:00
|
|
|
//dump("\ncomplex-yes", n);
|
|
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//dump("\ncomplex-no", n);
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Complexmove(f *Node, t *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("\ncomplexmove-f", f)
|
|
|
|
|
Dump("complexmove-t", t)
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !t.Addable {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("complexmove: to not addable")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
ft := Simsimtype(f.Type)
|
|
|
|
|
tt := Simsimtype(t.Type)
|
2015-09-24 23:21:18 +02:00
|
|
|
// complex to complex move/convert.
|
2015-02-13 14:40:36 -05:00
|
|
|
// make f addable.
|
|
|
|
|
// also use temporary if possible stack overlap.
|
2015-09-24 23:21:18 +02:00
|
|
|
if (ft == TCOMPLEX64 || ft == TCOMPLEX128) && (tt == TCOMPLEX64 || tt == TCOMPLEX128) {
|
2015-04-02 19:58:37 -07:00
|
|
|
if !f.Addable || overlap_cplx(f, t) {
|
2015-02-23 16:07:24 -05:00
|
|
|
var tmp Node
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tmp, f.Type)
|
|
|
|
|
Complexmove(f, &tmp)
|
|
|
|
|
f = &tmp
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
subnode(&n1, &n2, f)
|
2015-02-23 16:07:24 -05:00
|
|
|
var n4 Node
|
|
|
|
|
var n3 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
subnode(&n3, &n4, t)
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&n1, &n3)
|
|
|
|
|
Cgen(&n2, &n4)
|
2015-09-24 23:21:18 +02:00
|
|
|
} else {
|
|
|
|
|
Fatalf("complexmove: unknown conversion: %v -> %v\n", f.Type, t.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Complexgen(n *Node, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("\ncomplexgen-n", n)
|
|
|
|
|
Dump("complexgen-res", res)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for n.Op == OCONVNOP {
|
|
|
|
|
n = n.Left
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// pick off float/complex opcodes
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OCOMPLEX:
|
2015-04-02 19:58:37 -07:00
|
|
|
if res.Addable {
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
subnode(&n1, &n2, res)
|
2015-02-23 16:07:24 -05:00
|
|
|
var tmp Node
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tmp, n1.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(n.Left, &tmp)
|
|
|
|
|
Cgen(n.Right, &n2)
|
|
|
|
|
Cgen(&tmp, &n1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OREAL, OIMAG:
|
2015-02-23 16:07:24 -05:00
|
|
|
nl := n.Left
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nl.Addable {
|
2015-02-23 16:07:24 -05:00
|
|
|
var tmp Node
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tmp, nl.Type)
|
|
|
|
|
Complexgen(nl, &tmp)
|
|
|
|
|
nl = &tmp
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
subnode(&n1, &n2, nl)
|
|
|
|
|
if n.Op == OREAL {
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&n1, res)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(&n2, res)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// perform conversion from n to res
|
2015-02-23 16:07:24 -05:00
|
|
|
tl := Simsimtype(res.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
tl = cplxsubtype(tl)
|
2015-02-23 16:07:24 -05:00
|
|
|
tr := Simsimtype(n.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
tr = cplxsubtype(tr)
|
|
|
|
|
if tl != tr {
|
2015-04-02 19:58:37 -07:00
|
|
|
if !n.Addable {
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Complexmove(n, &n1)
|
|
|
|
|
n = &n1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Complexmove(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !res.Addable {
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
2015-03-18 17:26:36 -04:00
|
|
|
Igen(res, &n1, nil)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
Regfree(&n1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable {
|
2015-02-13 14:40:36 -05:00
|
|
|
Complexmove(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
|
|
|
|
Dump("complexgen: unknown op", n)
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("complexgen: unknown op %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case ODOT,
|
|
|
|
|
ODOTPTR,
|
|
|
|
|
OINDEX,
|
|
|
|
|
OIND,
|
cmd/compile: fix liveness computation for heap-escaped parameters
The liveness computation of parameters generally was never
correct, but forcing all parameters to be live throughout the
function covered up that problem. The new SSA back end is
too clever: even though it currently keeps the parameter values live
throughout the function, it may find optimizations that mean
the current values are not written back to the original parameter
stack slots immediately or ever (for example if a parameter is set
to nil, SSA constant propagation may replace all later uses of the
parameter with a constant nil, eliminating the need to write the nil
value back to the stack slot), so the liveness code must now
track the actual operations on the stack slots, exposing these
problems.
One small problem in the handling of arguments is that nodarg
can return ONAME PPARAM nodes with adjusted offsets, so that
there are actually multiple *Node pointers for the same parameter
in the instruction stream. This might be possible to correct, but
not in this CL. For now, we fix this by using n.Orig instead of n
when considering PPARAM and PPARAMOUT nodes.
The major problem in the handling of arguments is general
confusion in the liveness code about the meaning of PPARAM|PHEAP
and PPARAMOUT|PHEAP nodes, especially as contrasted with PAUTO|PHEAP.
The difference between these two is that when a local variable "moves"
to the heap, it's really just allocated there to start with; in contrast,
when an argument moves to the heap, the actual data has to be copied
there from the stack at the beginning of the function, and when a
result "moves" to the heap the value in the heap has to be copied
back to the stack when the function returns
This general confusion is also present in the SSA back end.
The PHEAP bit worked decently when I first introduced it 7 years ago (!)
in 391425ae. The back end did nothing sophisticated, and in particular
there was no analysis at all: no escape analysis, no liveness analysis,
and certainly no SSA back end. But the complications caused in the
various downstream consumers suggest that this should be a detail
kept mainly in the front end.
This CL therefore eliminates both the PHEAP bit and even the idea of
"heap variables" from the back ends.
First, it replaces the PPARAM|PHEAP, PPARAMOUT|PHEAP, and PAUTO|PHEAP
variable classes with the single PAUTOHEAP, a pseudo-class indicating
a variable maintained on the heap and available by indirecting a
local variable kept on the stack (a plain PAUTO).
Second, walkexpr replaces all references to PAUTOHEAP variables
with indirections of the corresponding PAUTO variable.
The back ends and the liveness code now just see plain indirected
variables. This may actually produce better code, but the real goal
here is to eliminate these little-used and somewhat suspect code
paths in the back end analyses.
The OPARAM node type goes away too.
A followup CL will do the same to PPARAMREF. I'm not sure that
the back ends (SSA in particular) are handling those right either,
and with the framework established in this CL that change is trivial
and the result clearly more correct.
Fixes #15747.
Change-Id: I2770b1ce3cbc93981bfc7166be66a9da12013d74
Reviewed-on: https://go-review.googlesource.com/23393
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2016-05-25 01:33:24 -04:00
|
|
|
ONAME, // PPARAMREF var
|
2015-02-13 14:40:36 -05:00
|
|
|
OCALLFUNC,
|
|
|
|
|
OCALLMETH,
|
|
|
|
|
OCALLINTER:
|
2015-02-23 16:07:24 -05:00
|
|
|
var n1 Node
|
2015-03-18 17:26:36 -04:00
|
|
|
Igen(n, &n1, res)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
Complexmove(&n1, res)
|
2015-03-18 17:26:36 -04:00
|
|
|
Regfree(&n1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OCONV,
|
|
|
|
|
OADD,
|
|
|
|
|
OSUB,
|
|
|
|
|
OMUL,
|
|
|
|
|
OMINUS,
|
|
|
|
|
OCOMPLEX,
|
|
|
|
|
OREAL,
|
|
|
|
|
OIMAG:
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
nl := n.Left
|
2015-02-13 14:40:36 -05:00
|
|
|
if nl == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
nr := n.Right
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// make both sides addable in ullman order
|
2015-02-23 16:07:24 -05:00
|
|
|
var tnl Node
|
2015-02-13 14:40:36 -05:00
|
|
|
if nr != nil {
|
2015-04-02 19:58:37 -07:00
|
|
|
if nl.Ullman > nr.Ullman && !nl.Addable {
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tnl, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(nl, &tnl)
|
2015-02-13 14:40:36 -05:00
|
|
|
nl = &tnl
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nr.Addable {
|
2015-02-23 16:07:24 -05:00
|
|
|
var tnr Node
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tnr, nr.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(nr, &tnr)
|
2015-02-13 14:40:36 -05:00
|
|
|
nr = &tnr
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nl.Addable {
|
2015-02-13 14:40:36 -05:00
|
|
|
Tempname(&tnl, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgen(nl, &tnl)
|
2015-02-13 14:40:36 -05:00
|
|
|
nl = &tnl
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2016-04-27 15:10:10 +10:00
|
|
|
Fatalf("complexgen: unknown op %v", n.Op)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OCONV:
|
|
|
|
|
Complexmove(nl, res)
|
|
|
|
|
|
|
|
|
|
case OMINUS:
|
|
|
|
|
complexminus(nl, res)
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OADD, OSUB:
|
2015-09-24 23:21:18 +02:00
|
|
|
complexadd(n.Op, nl, nr, res)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case OMUL:
|
|
|
|
|
complexmul(nl, nr, res)
|
|
|
|
|
}
|
|
|
|
|
}
|