2015-06-06 16:03:33 -07:00
|
|
|
// Copyright 2015 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
2016-04-05 15:11:08 +10:00
|
|
|
// +build ignore
|
|
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
// The gen command generates Go code (in the parent directory) for all
|
|
|
|
|
// the architecture-specific opcodes, blocks, and rewrites.
|
|
|
|
|
package main
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bytes"
|
2015-08-12 15:39:16 -07:00
|
|
|
"flag"
|
2015-06-06 16:03:33 -07:00
|
|
|
"fmt"
|
|
|
|
|
"go/format"
|
|
|
|
|
"io/ioutil"
|
|
|
|
|
"log"
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
"os"
|
2016-03-12 14:07:40 -08:00
|
|
|
"path"
|
2015-07-28 16:04:50 -07:00
|
|
|
"regexp"
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
"runtime"
|
|
|
|
|
"runtime/pprof"
|
2015-08-11 12:51:33 -07:00
|
|
|
"sort"
|
2017-03-09 14:45:37 -08:00
|
|
|
"strings"
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
"sync"
|
2015-06-06 16:03:33 -07:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type arch struct {
|
2016-05-19 12:33:30 -04:00
|
|
|
name string
|
|
|
|
|
pkg string // obj package to import for this arch.
|
|
|
|
|
genfile string // source file containing opcode code generation.
|
|
|
|
|
ops []opData
|
|
|
|
|
blocks []blockData
|
|
|
|
|
regnames []string
|
|
|
|
|
gpregmask regMask
|
|
|
|
|
fpregmask regMask
|
2019-09-12 21:05:45 +02:00
|
|
|
fp32regmask regMask
|
|
|
|
|
fp64regmask regMask
|
2016-08-22 12:25:23 -04:00
|
|
|
specialregmask regMask
|
2016-05-19 12:33:30 -04:00
|
|
|
framepointerreg int8
|
2016-10-06 15:06:45 -04:00
|
|
|
linkreg int8
|
2016-05-19 12:33:30 -04:00
|
|
|
generic bool
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type opData struct {
|
2016-01-31 11:39:39 -08:00
|
|
|
name string
|
|
|
|
|
reg regInfo
|
|
|
|
|
asm string
|
|
|
|
|
typ string // default result type
|
|
|
|
|
aux string
|
|
|
|
|
rematerializeable bool
|
2017-03-09 14:45:37 -08:00
|
|
|
argLength int32 // number of arguments, if -1, then this operation has a variable number of arguments
|
|
|
|
|
commutative bool // this operation is commutative on its first 2 arguments (e.g. addition)
|
|
|
|
|
resultInArg0 bool // (first, if a tuple) output of v and v.Args[0] must be allocated to the same register
|
|
|
|
|
resultNotInArgs bool // outputs must not be allocated to the same registers as inputs
|
|
|
|
|
clobberFlags bool // this op clobbers flags register
|
|
|
|
|
call bool // is a function call
|
|
|
|
|
nilCheck bool // this op is a nil check on arg0
|
|
|
|
|
faultOnNilArg0 bool // this op will fault if arg0 is nil (and aux encodes a small offset)
|
|
|
|
|
faultOnNilArg1 bool // this op will fault if arg1 is nil (and aux encodes a small offset)
|
|
|
|
|
usesScratch bool // this op requires scratch memory space
|
|
|
|
|
hasSideEffects bool // for "reasons", not to be eliminated. E.g., atomic store, #19182.
|
2018-02-28 16:30:07 -05:00
|
|
|
zeroWidth bool // op never translates into any machine code. example: copy, which may sometimes translate to machine code, is not zero-width.
|
2017-03-09 14:45:37 -08:00
|
|
|
symEffect string // effect this op has on symbol in aux
|
2019-03-09 12:41:34 -08:00
|
|
|
scale uint8 // amd64/386 indexed load scale
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type blockData struct {
|
|
|
|
|
name string
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type regInfo struct {
|
2018-09-24 12:26:58 -07:00
|
|
|
// inputs[i] encodes the set of registers allowed for the i'th input.
|
|
|
|
|
// Inputs that don't use registers (flags, memory, etc.) should be 0.
|
|
|
|
|
inputs []regMask
|
|
|
|
|
// clobbers encodes the set of registers that are overwritten by
|
|
|
|
|
// the instruction (other than the output registers).
|
2015-06-06 16:03:33 -07:00
|
|
|
clobbers regMask
|
2019-09-08 19:36:13 +03:00
|
|
|
// outputs[i] encodes the set of registers allowed for the i'th output.
|
2018-09-24 12:26:58 -07:00
|
|
|
outputs []regMask
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
type regMask uint64
|
|
|
|
|
|
2015-06-11 15:52:08 -07:00
|
|
|
func (a arch) regMaskComment(r regMask) string {
|
|
|
|
|
var buf bytes.Buffer
|
|
|
|
|
for i := uint64(0); r != 0; i++ {
|
|
|
|
|
if r&1 != 0 {
|
|
|
|
|
if buf.Len() == 0 {
|
|
|
|
|
buf.WriteString(" //")
|
|
|
|
|
}
|
|
|
|
|
buf.WriteString(" ")
|
|
|
|
|
buf.WriteString(a.regnames[i])
|
|
|
|
|
}
|
|
|
|
|
r >>= 1
|
|
|
|
|
}
|
|
|
|
|
return buf.String()
|
|
|
|
|
}
|
|
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
var archs []arch
|
|
|
|
|
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
|
|
|
|
|
var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
|
|
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
func main() {
|
2015-08-12 15:39:16 -07:00
|
|
|
flag.Parse()
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
if *cpuprofile != "" {
|
|
|
|
|
f, err := os.Create(*cpuprofile)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal("could not create CPU profile: ", err)
|
|
|
|
|
}
|
|
|
|
|
defer f.Close()
|
|
|
|
|
if err := pprof.StartCPUProfile(f); err != nil {
|
|
|
|
|
log.Fatal("could not start CPU profile: ", err)
|
|
|
|
|
}
|
|
|
|
|
defer pprof.StopCPUProfile()
|
|
|
|
|
}
|
2016-06-30 11:13:24 -07:00
|
|
|
sort.Sort(ArchsByName(archs))
|
2019-09-18 16:33:54 +01:00
|
|
|
|
|
|
|
|
// The generate tasks are run concurrently, since they are CPU-intensive
|
|
|
|
|
// that can easily make use of many cores on a machine.
|
|
|
|
|
//
|
|
|
|
|
// Note that there is no limit on the concurrency at the moment. On a
|
|
|
|
|
// four-core laptop at the time of writing, peak RSS usually reaches
|
|
|
|
|
// ~200MiB, which seems doable by practically any machine nowadays. If
|
|
|
|
|
// that stops being the case, we can cap this func to a fixed number of
|
|
|
|
|
// architectures being generated at once.
|
|
|
|
|
|
|
|
|
|
tasks := []func(){
|
|
|
|
|
genOp,
|
|
|
|
|
}
|
|
|
|
|
for _, a := range archs {
|
|
|
|
|
a := a // the funcs are ran concurrently at a later time
|
|
|
|
|
tasks = append(tasks, func() {
|
|
|
|
|
genRules(a)
|
|
|
|
|
genSplitLoadRules(a)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
for _, task := range tasks {
|
|
|
|
|
task := task
|
|
|
|
|
wg.Add(1)
|
|
|
|
|
go func() {
|
|
|
|
|
task()
|
|
|
|
|
wg.Done()
|
|
|
|
|
}()
|
|
|
|
|
}
|
|
|
|
|
wg.Wait()
|
|
|
|
|
|
cmd/compile: teach rulegen to remove unused decls
First, add cpu and memory profiling flags, as these are useful to see
where rulegen is spending its time. It now takes many seconds to run on
a recent laptop, so we have to keep an eye on what it's doing.
Second, stop writing '_ = var' lines to keep imports and variables used
at all times. Now that rulegen removes all such unused names, they're
unnecessary.
To perform the removal, lean on go/types to first detect what names are
unused. We can configure it to give us all the type-checking errors in a
file, so we can collect all "declared but not used" errors in a single
pass.
We then use astutil.Apply to remove the relevant nodes based on the line
information from each unused error. This allows us to apply the changes
without having to do extra parser+printer roundtrips to plaintext, which
are far too expensive.
We need to do multiple such passes, as removing an unused variable
declaration might then make another declaration unused. Two passes are
enough to clean every file at the moment, so add a limit of three passes
for now to avoid eating cpu uncontrollably by accident.
The resulting performance of the changes above is a ~30% loss across the
table, since go/types is fairly expensive. The numbers were obtained
with 'benchcmd Rulegen go run *.go', which involves compiling rulegen
itself, but that seems reflective of how the program is used.
name old time/op new time/op delta
Rulegen 5.61s ± 0% 7.36s ± 0% +31.17% (p=0.016 n=5+4)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 9.92s ± 1% +37.76% (p=0.016 n=5+4)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 169ms ±17% +25.66% (p=0.032 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 85.6MB ± 2% +20.56% (p=0.008 n=5+5)
We can live with a bit more resource usage, but the time/op getting
close to 10s isn't good. To win that back, introduce concurrency in
main.go. This further increases resource usage a bit, but the real time
on this quad-core laptop is greatly reduced. The final benchstat is as
follows:
name old time/op new time/op delta
Rulegen 5.61s ± 0% 3.97s ± 1% -29.26% (p=0.008 n=5+5)
name old user-time/op new user-time/op delta
Rulegen 7.20s ± 1% 13.91s ± 1% +93.09% (p=0.008 n=5+5)
name old sys-time/op new sys-time/op delta
Rulegen 135ms ±19% 269ms ± 9% +99.17% (p=0.008 n=5+5)
name old peak-RSS-bytes new peak-RSS-bytes delta
Rulegen 71.0MB ± 2% 226.3MB ± 1% +218.72% (p=0.008 n=5+5)
It might be possible to reduce the cpu or memory usage in the future,
such as configuring go/types to do less work, or taking shortcuts to
avoid having to run it many times. For now, ~2x cpu and ~4x memory usage
seems like a fair trade for a faster and better rulegen.
Finally, we can remove the old code that tried to remove some unused
variables in a hacky and unmaintainable way.
Change-Id: Iff9e83e3f253babf5a1bd48cc993033b8550cee6
Reviewed-on: https://go-review.googlesource.com/c/go/+/189798
Run-TryBot: Daniel Martí <mvdan@mvdan.cc>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2019-08-10 19:27:45 +02:00
|
|
|
if *memprofile != "" {
|
|
|
|
|
f, err := os.Create(*memprofile)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatal("could not create memory profile: ", err)
|
|
|
|
|
}
|
|
|
|
|
defer f.Close()
|
|
|
|
|
runtime.GC() // get up-to-date statistics
|
|
|
|
|
if err := pprof.WriteHeapProfile(f); err != nil {
|
|
|
|
|
log.Fatal("could not write memory profile: ", err)
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
2015-06-16 11:11:16 -07:00
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
func genOp() {
|
|
|
|
|
w := new(bytes.Buffer)
|
2017-04-03 17:50:34 +00:00
|
|
|
fmt.Fprintf(w, "// Code generated from gen/*Ops.go; DO NOT EDIT.\n")
|
2016-03-01 10:58:06 -08:00
|
|
|
fmt.Fprintln(w)
|
2015-06-06 16:03:33 -07:00
|
|
|
fmt.Fprintln(w, "package ssa")
|
|
|
|
|
|
2016-03-07 18:00:08 -08:00
|
|
|
fmt.Fprintln(w, "import (")
|
|
|
|
|
fmt.Fprintln(w, "\"cmd/internal/obj\"")
|
2016-03-12 14:07:40 -08:00
|
|
|
for _, a := range archs {
|
|
|
|
|
if a.pkg != "" {
|
|
|
|
|
fmt.Fprintf(w, "%q\n", a.pkg)
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-03-07 18:00:08 -08:00
|
|
|
fmt.Fprintln(w, ")")
|
2015-06-16 11:11:16 -07:00
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
// generate Block* declarations
|
|
|
|
|
fmt.Fprintln(w, "const (")
|
2015-08-18 14:39:26 -04:00
|
|
|
fmt.Fprintln(w, "BlockInvalid BlockKind = iota")
|
2015-06-06 16:03:33 -07:00
|
|
|
for _, a := range archs {
|
|
|
|
|
fmt.Fprintln(w)
|
|
|
|
|
for _, d := range a.blocks {
|
|
|
|
|
fmt.Fprintf(w, "Block%s%s\n", a.Name(), d.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, ")")
|
|
|
|
|
|
|
|
|
|
// generate block kind string method
|
|
|
|
|
fmt.Fprintln(w, "var blockString = [...]string{")
|
2015-08-18 14:39:26 -04:00
|
|
|
fmt.Fprintln(w, "BlockInvalid:\"BlockInvalid\",")
|
2015-06-06 16:03:33 -07:00
|
|
|
for _, a := range archs {
|
|
|
|
|
fmt.Fprintln(w)
|
|
|
|
|
for _, b := range a.blocks {
|
|
|
|
|
fmt.Fprintf(w, "Block%s%s:\"%s\",\n", a.Name(), b.name, b.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, "}")
|
|
|
|
|
fmt.Fprintln(w, "func (k BlockKind) String() string {return blockString[k]}")
|
|
|
|
|
|
|
|
|
|
// generate Op* declarations
|
|
|
|
|
fmt.Fprintln(w, "const (")
|
2016-09-13 17:01:01 -07:00
|
|
|
fmt.Fprintln(w, "OpInvalid Op = iota") // make sure OpInvalid is 0.
|
2015-06-06 16:03:33 -07:00
|
|
|
for _, a := range archs {
|
|
|
|
|
fmt.Fprintln(w)
|
|
|
|
|
for _, v := range a.ops {
|
2016-09-13 17:01:01 -07:00
|
|
|
if v.name == "Invalid" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-06-06 16:03:33 -07:00
|
|
|
fmt.Fprintf(w, "Op%s%s\n", a.Name(), v.name)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, ")")
|
|
|
|
|
|
|
|
|
|
// generate OpInfo table
|
|
|
|
|
fmt.Fprintln(w, "var opcodeTable = [...]opInfo{")
|
|
|
|
|
fmt.Fprintln(w, " { name: \"OpInvalid\" },")
|
|
|
|
|
for _, a := range archs {
|
|
|
|
|
fmt.Fprintln(w)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
pkg := path.Base(a.pkg)
|
2015-06-06 16:03:33 -07:00
|
|
|
for _, v := range a.ops {
|
2016-09-13 17:01:01 -07:00
|
|
|
if v.name == "Invalid" {
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-06-06 16:03:33 -07:00
|
|
|
fmt.Fprintln(w, "{")
|
|
|
|
|
fmt.Fprintf(w, "name:\"%s\",\n", v.name)
|
2016-01-31 11:39:39 -08:00
|
|
|
|
|
|
|
|
// flags
|
|
|
|
|
if v.aux != "" {
|
|
|
|
|
fmt.Fprintf(w, "auxType: aux%s,\n", v.aux)
|
|
|
|
|
}
|
2016-02-27 08:04:48 -06:00
|
|
|
fmt.Fprintf(w, "argLen: %d,\n", v.argLength)
|
|
|
|
|
|
2016-01-31 11:39:39 -08:00
|
|
|
if v.rematerializeable {
|
|
|
|
|
if v.reg.clobbers != 0 {
|
|
|
|
|
log.Fatalf("%s is rematerializeable and clobbers registers", v.name)
|
|
|
|
|
}
|
2017-09-11 21:23:06 +01:00
|
|
|
if v.clobberFlags {
|
|
|
|
|
log.Fatalf("%s is rematerializeable and clobbers flags", v.name)
|
|
|
|
|
}
|
2016-01-31 11:39:39 -08:00
|
|
|
fmt.Fprintln(w, "rematerializeable: true,")
|
|
|
|
|
}
|
2016-02-22 11:19:15 +01:00
|
|
|
if v.commutative {
|
|
|
|
|
fmt.Fprintln(w, "commutative: true,")
|
|
|
|
|
}
|
2016-03-10 13:05:56 -08:00
|
|
|
if v.resultInArg0 {
|
|
|
|
|
fmt.Fprintln(w, "resultInArg0: true,")
|
cmd/compile: don't lower OpConvert
Currently, each architecture lowers OpConvert to an arch-specific
OpXXXconvert. This is silly because OpConvert means the same thing on
all architectures and is logically a no-op that exists only to keep
track of conversions to and from unsafe.Pointer. Furthermore, lowering
it makes it harder to recognize in other analyses, particularly
liveness analysis.
This CL eliminates the lowering of OpConvert, leaving it as the
generic op until code generation time.
The main complexity here is that we still need to register-allocate
OpConvert operations. Currently, each arch's lowered OpConvert
specifies all GP registers in its register mask. Ideally, OpConvert
wouldn't affect value homing at all, and we could just copy the home
of OpConvert's source, but this can potentially home an OpConvert in a
LocalSlot, which neither regalloc nor stackalloc expect. Rather than
try to disentangle this assumption from regalloc and stackalloc, we
continue to register-allocate OpConvert, but teach regalloc that
OpConvert can be allocated to any allocatable GP register.
For #24543.
Change-Id: I795a6aee5fd94d4444a7bafac3838a400c9f7bb6
Reviewed-on: https://go-review.googlesource.com/108496
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2018-04-02 16:08:09 -04:00
|
|
|
// OpConvert's register mask is selected dynamically,
|
|
|
|
|
// so don't try to check it in the static table.
|
|
|
|
|
if v.name != "Convert" && v.reg.inputs[0] != v.reg.outputs[0] {
|
2017-08-23 16:46:00 -07:00
|
|
|
log.Fatalf("%s: input[0] and output[0] must use the same registers for %s", a.name, v.name)
|
2016-04-10 08:26:43 -07:00
|
|
|
}
|
cmd/compile: don't lower OpConvert
Currently, each architecture lowers OpConvert to an arch-specific
OpXXXconvert. This is silly because OpConvert means the same thing on
all architectures and is logically a no-op that exists only to keep
track of conversions to and from unsafe.Pointer. Furthermore, lowering
it makes it harder to recognize in other analyses, particularly
liveness analysis.
This CL eliminates the lowering of OpConvert, leaving it as the
generic op until code generation time.
The main complexity here is that we still need to register-allocate
OpConvert operations. Currently, each arch's lowered OpConvert
specifies all GP registers in its register mask. Ideally, OpConvert
wouldn't affect value homing at all, and we could just copy the home
of OpConvert's source, but this can potentially home an OpConvert in a
LocalSlot, which neither regalloc nor stackalloc expect. Rather than
try to disentangle this assumption from regalloc and stackalloc, we
continue to register-allocate OpConvert, but teach regalloc that
OpConvert can be allocated to any allocatable GP register.
For #24543.
Change-Id: I795a6aee5fd94d4444a7bafac3838a400c9f7bb6
Reviewed-on: https://go-review.googlesource.com/108496
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2018-04-02 16:08:09 -04:00
|
|
|
if v.name != "Convert" && v.commutative && v.reg.inputs[1] != v.reg.outputs[0] {
|
2017-08-23 16:46:00 -07:00
|
|
|
log.Fatalf("%s: input[1] and output[0] must use the same registers for %s", a.name, v.name)
|
2016-04-10 08:26:43 -07:00
|
|
|
}
|
2016-03-10 13:05:56 -08:00
|
|
|
}
|
2016-08-29 16:26:57 -04:00
|
|
|
if v.resultNotInArgs {
|
|
|
|
|
fmt.Fprintln(w, "resultNotInArgs: true,")
|
|
|
|
|
}
|
2016-08-04 06:57:34 -04:00
|
|
|
if v.clobberFlags {
|
|
|
|
|
fmt.Fprintln(w, "clobberFlags: true,")
|
|
|
|
|
}
|
2016-09-09 13:11:07 -07:00
|
|
|
if v.call {
|
|
|
|
|
fmt.Fprintln(w, "call: true,")
|
|
|
|
|
}
|
2016-09-13 17:01:01 -07:00
|
|
|
if v.nilCheck {
|
|
|
|
|
fmt.Fprintln(w, "nilCheck: true,")
|
|
|
|
|
}
|
|
|
|
|
if v.faultOnNilArg0 {
|
|
|
|
|
fmt.Fprintln(w, "faultOnNilArg0: true,")
|
2016-10-18 23:50:42 +02:00
|
|
|
if v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
|
2016-09-13 17:01:01 -07:00
|
|
|
log.Fatalf("faultOnNilArg0 with aux %s not allowed", v.aux)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if v.faultOnNilArg1 {
|
|
|
|
|
fmt.Fprintln(w, "faultOnNilArg1: true,")
|
2016-10-18 23:50:42 +02:00
|
|
|
if v.aux != "SymOff" && v.aux != "SymValAndOff" && v.aux != "Int64" && v.aux != "Int32" && v.aux != "" {
|
2016-09-13 17:01:01 -07:00
|
|
|
log.Fatalf("faultOnNilArg1 with aux %s not allowed", v.aux)
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-10-04 13:00:21 -07:00
|
|
|
if v.usesScratch {
|
|
|
|
|
fmt.Fprintln(w, "usesScratch: true,")
|
|
|
|
|
}
|
2017-02-21 15:22:52 -05:00
|
|
|
if v.hasSideEffects {
|
|
|
|
|
fmt.Fprintln(w, "hasSideEffects: true,")
|
|
|
|
|
}
|
2018-02-28 16:30:07 -05:00
|
|
|
if v.zeroWidth {
|
|
|
|
|
fmt.Fprintln(w, "zeroWidth: true,")
|
|
|
|
|
}
|
2017-03-09 14:46:43 -08:00
|
|
|
needEffect := strings.HasPrefix(v.aux, "Sym")
|
2017-03-09 14:45:37 -08:00
|
|
|
if v.symEffect != "" {
|
2017-03-09 14:46:43 -08:00
|
|
|
if !needEffect {
|
2017-03-09 14:45:37 -08:00
|
|
|
log.Fatalf("symEffect with aux %s not allowed", v.aux)
|
|
|
|
|
}
|
2018-05-08 08:10:17 -07:00
|
|
|
fmt.Fprintf(w, "symEffect: Sym%s,\n", strings.Replace(v.symEffect, ",", "|Sym", -1))
|
2017-03-09 14:46:43 -08:00
|
|
|
} else if needEffect {
|
|
|
|
|
log.Fatalf("symEffect needed for aux %s", v.aux)
|
2017-03-09 14:45:37 -08:00
|
|
|
}
|
2015-07-21 07:10:56 -07:00
|
|
|
if a.name == "generic" {
|
|
|
|
|
fmt.Fprintln(w, "generic:true,")
|
|
|
|
|
fmt.Fprintln(w, "},") // close op
|
|
|
|
|
// generic ops have no reg info or asm
|
|
|
|
|
continue
|
|
|
|
|
}
|
2015-06-16 13:33:32 -07:00
|
|
|
if v.asm != "" {
|
2016-03-12 14:07:40 -08:00
|
|
|
fmt.Fprintf(w, "asm: %s.A%s,\n", pkg, v.asm)
|
2015-06-16 11:11:16 -07:00
|
|
|
}
|
2019-03-09 12:41:34 -08:00
|
|
|
if v.scale != 0 {
|
|
|
|
|
fmt.Fprintf(w, "scale: %d,\n", v.scale)
|
|
|
|
|
}
|
2015-06-06 16:03:33 -07:00
|
|
|
fmt.Fprintln(w, "reg:regInfo{")
|
2015-08-11 12:51:33 -07:00
|
|
|
|
2016-03-01 23:21:55 +00:00
|
|
|
// Compute input allocation order. We allocate from the
|
|
|
|
|
// most to the least constrained input. This order guarantees
|
2015-08-11 12:51:33 -07:00
|
|
|
// that we will always be able to find a register.
|
|
|
|
|
var s []intPair
|
|
|
|
|
for i, r := range v.reg.inputs {
|
|
|
|
|
if r != 0 {
|
|
|
|
|
s = append(s, intPair{countRegs(r), i})
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if len(s) > 0 {
|
|
|
|
|
sort.Sort(byKey(s))
|
|
|
|
|
fmt.Fprintln(w, "inputs: []inputInfo{")
|
|
|
|
|
for _, p := range s {
|
|
|
|
|
r := v.reg.inputs[p.val]
|
|
|
|
|
fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
|
2015-07-21 07:10:56 -07:00
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, "},")
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
2016-07-13 16:15:54 -07:00
|
|
|
|
2015-07-21 07:10:56 -07:00
|
|
|
if v.reg.clobbers > 0 {
|
|
|
|
|
fmt.Fprintf(w, "clobbers: %d,%s\n", v.reg.clobbers, a.regMaskComment(v.reg.clobbers))
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
2016-07-13 16:15:54 -07:00
|
|
|
|
2015-07-21 07:10:56 -07:00
|
|
|
// reg outputs
|
2016-07-13 16:15:54 -07:00
|
|
|
s = s[:0]
|
|
|
|
|
for i, r := range v.reg.outputs {
|
2016-08-04 06:57:34 -04:00
|
|
|
s = append(s, intPair{countRegs(r), i})
|
2016-07-13 16:15:54 -07:00
|
|
|
}
|
|
|
|
|
if len(s) > 0 {
|
|
|
|
|
sort.Sort(byKey(s))
|
|
|
|
|
fmt.Fprintln(w, "outputs: []outputInfo{")
|
|
|
|
|
for _, p := range s {
|
|
|
|
|
r := v.reg.outputs[p.val]
|
|
|
|
|
fmt.Fprintf(w, "{%d,%d},%s\n", p.val, r, a.regMaskComment(r))
|
2015-07-21 07:10:56 -07:00
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, "},")
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
2015-07-21 07:10:56 -07:00
|
|
|
fmt.Fprintln(w, "},") // close reg info
|
|
|
|
|
fmt.Fprintln(w, "},") // close op
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, "}")
|
|
|
|
|
|
2016-03-07 18:00:08 -08:00
|
|
|
fmt.Fprintln(w, "func (o Op) Asm() obj.As {return opcodeTable[o].asm}")
|
2019-03-09 12:41:34 -08:00
|
|
|
fmt.Fprintln(w, "func (o Op) Scale() int16 {return int16(opcodeTable[o].scale)}")
|
2015-06-16 11:11:16 -07:00
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
// generate op string method
|
|
|
|
|
fmt.Fprintln(w, "func (o Op) String() string {return opcodeTable[o].name }")
|
|
|
|
|
|
2016-10-04 13:00:21 -07:00
|
|
|
fmt.Fprintln(w, "func (o Op) UsesScratch() bool { return opcodeTable[o].usesScratch }")
|
|
|
|
|
|
2017-03-09 14:45:37 -08:00
|
|
|
fmt.Fprintln(w, "func (o Op) SymEffect() SymEffect { return opcodeTable[o].symEffect }")
|
|
|
|
|
fmt.Fprintln(w, "func (o Op) IsCall() bool { return opcodeTable[o].call }")
|
|
|
|
|
|
2016-03-21 22:57:26 -07:00
|
|
|
// generate registers
|
|
|
|
|
for _, a := range archs {
|
|
|
|
|
if a.generic {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintf(w, "var registers%s = [...]Register {\n", a.name)
|
2018-02-24 12:13:14 -05:00
|
|
|
var gcRegN int
|
2016-03-21 22:57:26 -07:00
|
|
|
for i, r := range a.regnames {
|
2016-09-16 09:36:00 -07:00
|
|
|
pkg := a.pkg[len("cmd/internal/obj/"):]
|
|
|
|
|
var objname string // name in cmd/internal/obj/$ARCH
|
|
|
|
|
switch r {
|
|
|
|
|
case "SB":
|
|
|
|
|
// SB isn't a real register. cmd/internal/obj expects 0 in this case.
|
|
|
|
|
objname = "0"
|
|
|
|
|
case "SP":
|
|
|
|
|
objname = pkg + ".REGSP"
|
|
|
|
|
case "g":
|
|
|
|
|
objname = pkg + ".REGG"
|
|
|
|
|
default:
|
|
|
|
|
objname = pkg + ".REG_" + r
|
|
|
|
|
}
|
2018-02-24 12:13:14 -05:00
|
|
|
// Assign a GC register map index to registers
|
|
|
|
|
// that may contain pointers.
|
|
|
|
|
gcRegIdx := -1
|
|
|
|
|
if a.gpregmask&(1<<uint(i)) != 0 {
|
|
|
|
|
gcRegIdx = gcRegN
|
|
|
|
|
gcRegN++
|
|
|
|
|
}
|
|
|
|
|
fmt.Fprintf(w, " {%d, %s, %d, \"%s\"},\n", i, objname, gcRegIdx, r)
|
|
|
|
|
}
|
|
|
|
|
if gcRegN > 32 {
|
|
|
|
|
// Won't fit in a uint32 mask.
|
|
|
|
|
log.Fatalf("too many GC registers (%d > 32) on %s", gcRegN, a.name)
|
2016-03-21 22:57:26 -07:00
|
|
|
}
|
|
|
|
|
fmt.Fprintln(w, "}")
|
2016-05-19 12:33:30 -04:00
|
|
|
fmt.Fprintf(w, "var gpRegMask%s = regMask(%d)\n", a.name, a.gpregmask)
|
|
|
|
|
fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
|
2019-09-12 21:05:45 +02:00
|
|
|
if a.fp32regmask != 0 {
|
|
|
|
|
fmt.Fprintf(w, "var fp32RegMask%s = regMask(%d)\n", a.name, a.fp32regmask)
|
|
|
|
|
}
|
|
|
|
|
if a.fp64regmask != 0 {
|
|
|
|
|
fmt.Fprintf(w, "var fp64RegMask%s = regMask(%d)\n", a.name, a.fp64regmask)
|
|
|
|
|
}
|
2016-08-22 12:25:23 -04:00
|
|
|
fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
|
2016-05-19 12:33:30 -04:00
|
|
|
fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
|
2016-10-06 15:06:45 -04:00
|
|
|
fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)
|
2016-03-21 22:57:26 -07:00
|
|
|
}
|
|
|
|
|
|
2015-06-06 16:03:33 -07:00
|
|
|
// gofmt result
|
|
|
|
|
b := w.Bytes()
|
|
|
|
|
var err error
|
|
|
|
|
b, err = format.Source(b)
|
|
|
|
|
if err != nil {
|
2016-02-27 08:04:48 -06:00
|
|
|
fmt.Printf("%s\n", w.Bytes())
|
2015-06-06 16:03:33 -07:00
|
|
|
panic(err)
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-18 16:33:54 +01:00
|
|
|
if err := ioutil.WriteFile("../opGen.go", b, 0666); err != nil {
|
2015-06-06 16:03:33 -07:00
|
|
|
log.Fatalf("can't write output: %v\n", err)
|
|
|
|
|
}
|
2015-07-28 16:04:50 -07:00
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
// Check that the arch genfile handles all the arch-specific opcodes.
|
2015-07-28 16:04:50 -07:00
|
|
|
// This is very much a hack, but it is better than nothing.
|
2019-09-17 14:39:54 +01:00
|
|
|
//
|
|
|
|
|
// Do a single regexp pass to record all ops being handled in a map, and
|
|
|
|
|
// then compare that with the ops list. This is much faster than one
|
|
|
|
|
// regexp pass per opcode.
|
2015-07-28 16:04:50 -07:00
|
|
|
for _, a := range archs {
|
2016-03-12 14:07:40 -08:00
|
|
|
if a.genfile == "" {
|
2015-07-28 16:04:50 -07:00
|
|
|
continue
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2019-09-17 14:39:54 +01:00
|
|
|
pattern := fmt.Sprintf(`\Wssa\.Op%s([a-zA-Z0-9_]+)\W`, a.name)
|
|
|
|
|
rxOp, err := regexp.Compile(pattern)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("bad opcode regexp %s: %v", pattern, err)
|
|
|
|
|
}
|
2019-09-13 14:42:05 +01:00
|
|
|
|
2019-09-17 14:39:54 +01:00
|
|
|
src, err := ioutil.ReadFile(a.genfile)
|
|
|
|
|
if err != nil {
|
|
|
|
|
log.Fatalf("can't read %s: %v", a.genfile, err)
|
|
|
|
|
}
|
|
|
|
|
seen := make(map[string]bool, len(a.ops))
|
|
|
|
|
for _, m := range rxOp.FindAllSubmatch(src, -1) {
|
|
|
|
|
seen[string(m[1])] = true
|
|
|
|
|
}
|
|
|
|
|
for _, op := range a.ops {
|
|
|
|
|
if !seen[op.name] {
|
|
|
|
|
log.Fatalf("Op%s%s has no code generation in %s", a.name, op.name, a.genfile)
|
2015-07-28 16:04:50 -07:00
|
|
|
}
|
2019-09-17 14:39:54 +01:00
|
|
|
}
|
2015-07-28 16:04:50 -07:00
|
|
|
}
|
2015-06-06 16:03:33 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Name returns the name of the architecture for use in Op* and Block* enumerations.
|
|
|
|
|
func (a arch) Name() string {
|
|
|
|
|
s := a.name
|
|
|
|
|
if s == "generic" {
|
|
|
|
|
s = ""
|
|
|
|
|
}
|
|
|
|
|
return s
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-11 12:51:33 -07:00
|
|
|
// countRegs returns the number of set bits in the register mask.
|
|
|
|
|
func countRegs(r regMask) int {
|
|
|
|
|
n := 0
|
|
|
|
|
for r != 0 {
|
|
|
|
|
n += int(r & 1)
|
|
|
|
|
r >>= 1
|
|
|
|
|
}
|
|
|
|
|
return n
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// for sorting a pair of integers by key
|
|
|
|
|
type intPair struct {
|
|
|
|
|
key, val int
|
|
|
|
|
}
|
|
|
|
|
type byKey []intPair
|
|
|
|
|
|
|
|
|
|
func (a byKey) Len() int { return len(a) }
|
|
|
|
|
func (a byKey) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
|
|
|
func (a byKey) Less(i, j int) bool { return a[i].key < a[j].key }
|
2016-06-30 11:13:24 -07:00
|
|
|
|
|
|
|
|
type ArchsByName []arch
|
|
|
|
|
|
|
|
|
|
func (x ArchsByName) Len() int { return len(x) }
|
|
|
|
|
func (x ArchsByName) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
|
|
|
|
|
func (x ArchsByName) Less(i, j int) bool { return x[i].name < x[j].name }
|