go/src/cmd/compile/internal/arm/ggen.go

105 lines
2.9 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package arm
import (
"cmd/compile/internal/gc"
"cmd/internal/obj"
"cmd/internal/obj/arm"
)
func defframe(ptxt *obj.Prog) {
// fill in argument size, stack size
ptxt.To.Type = obj.TYPE_TEXTSIZE
ptxt.To.Val = int32(gc.Rnd(gc.Curfn.Type.ArgWidth(), int64(gc.Widthptr)))
frame := uint32(gc.Rnd(gc.Stksize+gc.Maxarg, int64(gc.Widthreg)))
ptxt.To.Offset = int64(frame)
// insert code to contain ambiguously live variables
// so that garbage collector only sees initialized values
// when it looks for pointers.
p := ptxt
hi := int64(0)
lo := hi
r0 := uint32(0)
for _, n := range gc.Curfn.Func.Dcl {
cmd/compile: pack bool fields in Node, Name, Func and Type structs to bitsets This reduces compiler memory usage by up to 4% - see compilebench results below. name old time/op new time/op delta Template 245ms ± 4% 241ms ± 2% -1.88% (p=0.029 n=10+10) Unicode 126ms ± 3% 124ms ± 3% ~ (p=0.105 n=10+10) GoTypes 805ms ± 2% 813ms ± 3% ~ (p=0.515 n=8+10) Compiler 3.95s ± 2% 3.83s ± 1% -2.96% (p=0.000 n=9+10) MakeBash 47.4s ± 4% 46.6s ± 1% -1.59% (p=0.028 n=9+10) name old user-ns/op new user-ns/op delta Template 324M ± 5% 326M ± 3% ~ (p=0.935 n=10+10) Unicode 186M ± 5% 178M ±10% ~ (p=0.067 n=9+10) GoTypes 1.08G ± 7% 1.09G ± 4% ~ (p=0.956 n=10+10) Compiler 5.34G ± 4% 5.31G ± 1% ~ (p=0.501 n=10+8) name old alloc/op new alloc/op delta Template 41.0MB ± 0% 39.8MB ± 0% -3.03% (p=0.000 n=10+10) Unicode 32.3MB ± 0% 31.0MB ± 0% -4.13% (p=0.000 n=10+10) GoTypes 119MB ± 0% 116MB ± 0% -2.39% (p=0.000 n=10+10) Compiler 499MB ± 0% 487MB ± 0% -2.48% (p=0.000 n=10+10) name old allocs/op new allocs/op delta Template 380k ± 1% 379k ± 1% ~ (p=0.436 n=10+10) Unicode 324k ± 1% 324k ± 0% ~ (p=0.853 n=10+10) GoTypes 1.15M ± 0% 1.15M ± 0% ~ (p=0.481 n=10+10) Compiler 4.41M ± 0% 4.41M ± 0% -0.12% (p=0.007 n=10+10) name old text-bytes new text-bytes delta HelloSize 623k ± 0% 623k ± 0% ~ (all equal) CmdGoSize 6.64M ± 0% 6.64M ± 0% ~ (all equal) name old data-bytes new data-bytes delta HelloSize 5.81k ± 0% 5.81k ± 0% ~ (all equal) CmdGoSize 238k ± 0% 238k ± 0% ~ (all equal) name old bss-bytes new bss-bytes delta HelloSize 134k ± 0% 134k ± 0% ~ (all equal) CmdGoSize 152k ± 0% 152k ± 0% ~ (all equal) name old exe-bytes new exe-bytes delta HelloSize 967k ± 0% 967k ± 0% ~ (all equal) CmdGoSize 10.2M ± 0% 10.2M ± 0% ~ (all equal) Change-Id: I1f40af738254892bd6c8ba2eb43390b175753d52 Reviewed-on: https://go-review.googlesource.com/37445 Reviewed-by: Matthew Dempsky <mdempsky@google.com> Run-TryBot: Matthew Dempsky <mdempsky@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2017-02-27 19:56:38 +02:00
if !n.Name.Needzero() {
continue
}
if n.Class != gc.PAUTO {
gc.Fatalf("needzero class %d", n.Class)
}
if n.Type.Width%int64(gc.Widthptr) != 0 || n.Xoffset%int64(gc.Widthptr) != 0 || n.Type.Width == 0 {
gc.Fatalf("var %L has size %d offset %d", n, int(n.Type.Width), int(n.Xoffset))
}
if lo != hi && n.Xoffset+n.Type.Width >= lo-int64(2*gc.Widthptr) {
// merge with range we already have
lo = gc.Rnd(n.Xoffset, int64(gc.Widthptr))
continue
}
// zero old range
p = zerorange(p, int64(frame), lo, hi, &r0)
// set new range
hi = n.Xoffset + n.Type.Width
lo = n.Xoffset
}
// zero final range
zerorange(p, int64(frame), lo, hi, &r0)
}
func zerorange(p *obj.Prog, frame int64, lo int64, hi int64, r0 *uint32) *obj.Prog {
cnt := hi - lo
if cnt == 0 {
return p
}
if *r0 == 0 {
p = gc.Appendpp(p, arm.AMOVW, obj.TYPE_CONST, 0, 0, obj.TYPE_REG, arm.REG_R0, 0)
*r0 = 1
}
if cnt < int64(4*gc.Widthptr) {
for i := int64(0); i < cnt; i += int64(gc.Widthptr) {
p = gc.Appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REGSP, 4+frame+lo+i)
}
} else if !gc.Nacl && (cnt <= int64(128*gc.Widthptr)) {
p = gc.Appendpp(p, arm.AADD, obj.TYPE_CONST, 0, 4+frame+lo, obj.TYPE_REG, arm.REG_R1, 0)
p.Reg = arm.REGSP
p = gc.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
p.To.Name = obj.NAME_EXTERN
p.To.Sym = gc.Duffzero
p.To.Offset = 4 * (128 - cnt/int64(gc.Widthptr))
} else {
p = gc.Appendpp(p, arm.AADD, obj.TYPE_CONST, 0, 4+frame+lo, obj.TYPE_REG, arm.REG_R1, 0)
p.Reg = arm.REGSP
p = gc.Appendpp(p, arm.AADD, obj.TYPE_CONST, 0, cnt, obj.TYPE_REG, arm.REG_R2, 0)
p.Reg = arm.REG_R1
p = gc.Appendpp(p, arm.AMOVW, obj.TYPE_REG, arm.REG_R0, 0, obj.TYPE_MEM, arm.REG_R1, 4)
p1 := p
p.Scond |= arm.C_PBIT
p = gc.Appendpp(p, arm.ACMP, obj.TYPE_REG, arm.REG_R1, 0, obj.TYPE_NONE, 0, 0)
p.Reg = arm.REG_R2
p = gc.Appendpp(p, arm.ABNE, obj.TYPE_NONE, 0, 0, obj.TYPE_BRANCH, 0, 0)
gc.Patch(p, p1)
}
return p
}
cmd/internal/gc: move cgen, regalloc, et al to portable code This CL moves the bulk of the code that has been copy-and-pasted since the initial 386 port back into a shared place, cutting 5 copies to 1. The motivation here is not cleanup per se but instead to reduce the cost of introducing changes in shared concepts like regalloc or general expression evaluation. For example, a change after this one will implement x.(*T) without a call into the runtime. This CL makes that followup work 5x easier. The single copy still has more special cases for architecture details than I'd like, but having them called out explicitly like this at least opens the door to generalizing the conditions and smoothing out the distinctions in the future. This is a LARGE CL. I started by trying to pull in one function at a time in a sequence of CLs and it became clear that everything was so interrelated that it had to be moved as a whole. Apologies for the size. It is not clear how many more releases this code will matter for; eventually it will be replaced by Keith's SSA work. But as noted above, the deduplication was necessary to reduce the cost of working on the current code while we have it. Passes tests on amd64, 386, arm, and ppc64le. Can build arm64 binaries but not tested there. Being able to build binaries means it is probably very close. Change-Id: I735977f04c0614f80215fb12966dfe9bbd1f5861 Reviewed-on: https://go-review.googlesource.com/7853 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2015-03-18 17:26:36 -04:00
func ginsnop() {
p := gc.Prog(arm.AAND)
p.From.Type = obj.TYPE_REG
p.From.Reg = arm.REG_R0
p.To.Type = obj.TYPE_REG
p.To.Reg = arm.REG_R0
cmd/internal/gc: move cgen, regalloc, et al to portable code This CL moves the bulk of the code that has been copy-and-pasted since the initial 386 port back into a shared place, cutting 5 copies to 1. The motivation here is not cleanup per se but instead to reduce the cost of introducing changes in shared concepts like regalloc or general expression evaluation. For example, a change after this one will implement x.(*T) without a call into the runtime. This CL makes that followup work 5x easier. The single copy still has more special cases for architecture details than I'd like, but having them called out explicitly like this at least opens the door to generalizing the conditions and smoothing out the distinctions in the future. This is a LARGE CL. I started by trying to pull in one function at a time in a sequence of CLs and it became clear that everything was so interrelated that it had to be moved as a whole. Apologies for the size. It is not clear how many more releases this code will matter for; eventually it will be replaced by Keith's SSA work. But as noted above, the deduplication was necessary to reduce the cost of working on the current code while we have it. Passes tests on amd64, 386, arm, and ppc64le. Can build arm64 binaries but not tested there. Being able to build binaries means it is probably very close. Change-Id: I735977f04c0614f80215fb12966dfe9bbd1f5861 Reviewed-on: https://go-review.googlesource.com/7853 Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2015-03-18 17:26:36 -04:00
p.Scond = arm.C_SCOND_EQ
}