2016-03-12 14:07:40 -08:00
|
|
|
// Copyright 2016 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package amd64
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"math"
|
|
|
|
|
|
|
|
|
|
"cmd/compile/internal/gc"
|
|
|
|
|
"cmd/compile/internal/ssa"
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
"cmd/compile/internal/types"
|
2016-03-12 14:07:40 -08:00
|
|
|
"cmd/internal/obj"
|
|
|
|
|
"cmd/internal/obj/x86"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
|
|
|
|
|
func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
|
|
|
|
|
flive := b.FlagsLiveAtEnd
|
|
|
|
|
if b.Control != nil && b.Control.Type.IsFlags() {
|
|
|
|
|
flive = true
|
|
|
|
|
}
|
|
|
|
|
for i := len(b.Values) - 1; i >= 0; i-- {
|
|
|
|
|
v := b.Values[i]
|
2016-04-22 13:09:18 -07:00
|
|
|
if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
|
2016-03-12 14:07:40 -08:00
|
|
|
// The "mark" is any non-nil Aux value.
|
|
|
|
|
v.Aux = v
|
|
|
|
|
}
|
|
|
|
|
if v.Type.IsFlags() {
|
|
|
|
|
flive = false
|
|
|
|
|
}
|
|
|
|
|
for _, a := range v.Args {
|
|
|
|
|
if a.Type.IsFlags() {
|
|
|
|
|
flive = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// loadByType returns the load instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func loadByType(t *types.Type) obj.As {
|
2016-03-12 14:07:40 -08:00
|
|
|
// Avoid partial register write
|
2017-04-28 00:19:49 +00:00
|
|
|
if !t.IsFloat() && t.Size() <= 2 {
|
|
|
|
|
if t.Size() == 1 {
|
2016-03-12 14:07:40 -08:00
|
|
|
return x86.AMOVBLZX
|
|
|
|
|
} else {
|
|
|
|
|
return x86.AMOVWLZX
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Otherwise, there's no difference between load and store opcodes.
|
|
|
|
|
return storeByType(t)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// storeByType returns the store instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func storeByType(t *types.Type) obj.As {
|
2017-04-28 00:19:49 +00:00
|
|
|
width := t.Size()
|
2016-03-12 14:07:40 -08:00
|
|
|
if t.IsFloat() {
|
|
|
|
|
switch width {
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVSS
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVSD
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
switch width {
|
|
|
|
|
case 1:
|
|
|
|
|
return x86.AMOVB
|
|
|
|
|
case 2:
|
|
|
|
|
return x86.AMOVW
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVQ
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
panic("bad store type")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// moveByType returns the reg->reg move instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func moveByType(t *types.Type) obj.As {
|
2016-03-12 14:07:40 -08:00
|
|
|
if t.IsFloat() {
|
|
|
|
|
// Moving the whole sse2 register is faster
|
|
|
|
|
// than moving just the correct low portion of it.
|
|
|
|
|
// There is no xmm->xmm move with 1 byte opcode,
|
|
|
|
|
// so use movups, which has 2 byte opcode.
|
|
|
|
|
return x86.AMOVUPS
|
|
|
|
|
} else {
|
2017-04-28 00:19:49 +00:00
|
|
|
switch t.Size() {
|
2016-03-12 14:07:40 -08:00
|
|
|
case 1:
|
|
|
|
|
// Avoids partial register write
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 2:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVQ
|
|
|
|
|
case 16:
|
|
|
|
|
return x86.AMOVUPS // int128s are in SSE registers
|
|
|
|
|
default:
|
2017-04-28 00:19:49 +00:00
|
|
|
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// opregreg emits instructions for
|
|
|
|
|
// dest := dest(To) op src(From)
|
|
|
|
|
// and also returns the created obj.Prog so it
|
|
|
|
|
// may be further adjusted (offset, scale, etc).
|
2017-03-20 08:01:28 -07:00
|
|
|
func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
|
|
|
|
|
p := s.Prog(op)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = dest
|
|
|
|
|
p.From.Reg = src
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-09 14:50:58 -05:00
|
|
|
// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
|
2016-07-28 12:22:49 -04:00
|
|
|
// See runtime/mkduff.go.
|
|
|
|
|
func duffStart(size int64) int64 {
|
|
|
|
|
x, _ := duff(size)
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
func duffAdj(size int64) int64 {
|
|
|
|
|
_, x := duff(size)
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
|
|
|
|
|
// required to use the duffzero mechanism for a block of the given size.
|
|
|
|
|
func duff(size int64) (int64, int64) {
|
|
|
|
|
if size < 32 || size > 1024 || size%dzClearStep != 0 {
|
|
|
|
|
panic("bad duffzero size")
|
|
|
|
|
}
|
|
|
|
|
steps := size / dzClearStep
|
|
|
|
|
blocks := steps / dzBlockLen
|
|
|
|
|
steps %= dzBlockLen
|
|
|
|
|
off := dzBlockSize * (dzBlocks - blocks)
|
|
|
|
|
var adj int64
|
|
|
|
|
if steps != 0 {
|
2017-08-09 14:50:58 -05:00
|
|
|
off -= dzLeaqSize
|
2016-07-28 12:22:49 -04:00
|
|
|
off -= dzMovSize * steps
|
|
|
|
|
adj -= dzClearStep * (dzBlockLen - steps)
|
|
|
|
|
}
|
|
|
|
|
return off, adj
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|
|
|
|
switch v.Op {
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
r1 := v.Args[0].Reg()
|
|
|
|
|
r2 := v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
switch {
|
|
|
|
|
case r == r1:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r2
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case r == r2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r1
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
default:
|
|
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQ {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = r1
|
|
|
|
|
p.From.Scale = 1
|
|
|
|
|
p.From.Index = r2
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
}
|
2016-04-10 08:26:43 -07:00
|
|
|
// 2-address opcode arithmetic
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
|
|
|
|
|
ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
|
|
|
|
|
ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
|
|
|
|
|
ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
|
|
|
|
|
ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
|
|
|
|
|
ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
|
|
|
|
|
ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
|
2017-03-29 10:04:17 -07:00
|
|
|
ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
|
|
|
|
|
ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
|
|
|
|
|
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
|
|
|
|
|
ssa.OpAMD64PXOR:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
|
|
|
|
|
// Arg[0] (the dividend) is in AX.
|
|
|
|
|
// Arg[1] (the divisor) can be in any other register.
|
|
|
|
|
// Result[0] (the quotient) is in AX.
|
|
|
|
|
// Result[1] (the remainder) is in DX.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[1].Reg()
|
2016-07-18 10:18:12 -07:00
|
|
|
|
|
|
|
|
// Zero extend dividend.
|
2017-03-20 08:01:28 -07:00
|
|
|
c := s.Prog(x86.AXORL)
|
2016-07-18 10:18:12 -07:00
|
|
|
c.From.Type = obj.TYPE_REG
|
|
|
|
|
c.From.Reg = x86.REG_DX
|
|
|
|
|
c.To.Type = obj.TYPE_REG
|
|
|
|
|
c.To.Reg = x86.REG_DX
|
|
|
|
|
|
|
|
|
|
// Issue divide.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-07-18 10:18:12 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
|
|
|
|
|
// Arg[0] (the dividend) is in AX.
|
|
|
|
|
// Arg[1] (the divisor) can be in any other register.
|
|
|
|
|
// Result[0] (the quotient) is in AX.
|
|
|
|
|
// Result[1] (the remainder) is in DX.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
// CPU faults upon signed overflow, which occurs when the most
|
|
|
|
|
// negative int is divided by -1. Handle divide by -1 as a special case.
|
|
|
|
|
var c *obj.Prog
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64DIVQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
c = s.Prog(x86.ACMPQ)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVL:
|
2017-03-20 08:01:28 -07:00
|
|
|
c = s.Prog(x86.ACMPL)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVW:
|
2017-03-20 08:01:28 -07:00
|
|
|
c = s.Prog(x86.ACMPW)
|
2016-07-18 10:18:12 -07:00
|
|
|
}
|
|
|
|
|
c.From.Type = obj.TYPE_REG
|
|
|
|
|
c.From.Reg = r
|
|
|
|
|
c.To.Type = obj.TYPE_CONST
|
|
|
|
|
c.To.Offset = -1
|
2017-03-20 08:01:28 -07:00
|
|
|
j1 := s.Prog(x86.AJEQ)
|
2016-07-18 10:18:12 -07:00
|
|
|
j1.To.Type = obj.TYPE_BRANCH
|
|
|
|
|
|
|
|
|
|
// Sign extend dividend.
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64DIVQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACQO)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVL:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACDQ)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVW:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACWD)
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
// Issue divide.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-07-18 10:18:12 -07:00
|
|
|
p.From.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
// Skip over -1 fixup code.
|
2017-03-20 08:01:28 -07:00
|
|
|
j2 := s.Prog(obj.AJMP)
|
2016-07-18 10:18:12 -07:00
|
|
|
j2.To.Type = obj.TYPE_BRANCH
|
|
|
|
|
|
|
|
|
|
// Issue -1 fixup code.
|
|
|
|
|
// n / -1 = -n
|
2017-03-20 08:01:28 -07:00
|
|
|
n1 := s.Prog(x86.ANEGQ)
|
2016-07-18 10:18:12 -07:00
|
|
|
n1.To.Type = obj.TYPE_REG
|
|
|
|
|
n1.To.Reg = x86.REG_AX
|
|
|
|
|
|
|
|
|
|
// n % -1 == 0
|
2017-03-20 08:01:28 -07:00
|
|
|
n2 := s.Prog(x86.AXORL)
|
2016-07-18 10:18:12 -07:00
|
|
|
n2.From.Type = obj.TYPE_REG
|
|
|
|
|
n2.From.Reg = x86.REG_DX
|
|
|
|
|
n2.To.Type = obj.TYPE_REG
|
|
|
|
|
n2.To.Reg = x86.REG_DX
|
|
|
|
|
|
|
|
|
|
// TODO(khr): issue only the -1 fixup code we need.
|
|
|
|
|
// For instance, if only the quotient is used, no point in zeroing the remainder.
|
|
|
|
|
|
|
|
|
|
j1.To.Val = n1
|
|
|
|
|
j2.To.Val = s.Pc()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2017-03-03 11:35:44 -08:00
|
|
|
case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
|
2016-03-12 14:07:40 -08:00
|
|
|
// the frontend rewrites constant division by 8/16/32 bit integers into
|
|
|
|
|
// HMUL by a constant
|
|
|
|
|
// SSA rewrites generate the 64 bit versions
|
|
|
|
|
|
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
|
|
|
// and DX is the only output we care about (the high bits)
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
// IMULB puts the high portion in AH instead of DL,
|
|
|
|
|
// so move it to DL for consistency
|
2017-04-28 00:19:49 +00:00
|
|
|
if v.Type.Size() == 1 {
|
2017-03-20 08:01:28 -07:00
|
|
|
m := s.Prog(x86.AMOVB)
|
2016-03-12 14:07:40 -08:00
|
|
|
m.From.Type = obj.TYPE_REG
|
|
|
|
|
m.From.Reg = x86.REG_AH
|
|
|
|
|
m.To.Type = obj.TYPE_REG
|
|
|
|
|
m.To.Reg = x86.REG_DX
|
|
|
|
|
}
|
|
|
|
|
|
2016-10-06 15:43:47 -04:00
|
|
|
case ssa.OpAMD64MULQU2:
|
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
|
|
|
// results hi in DX, lo in AX
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-10-06 15:43:47 -04:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
|
|
|
|
|
case ssa.OpAMD64DIVQU2:
|
|
|
|
|
// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
|
|
|
|
|
// results q in AX, r in DX
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-10-06 15:43:47 -04:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[2].Reg()
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64AVGQU:
|
|
|
|
|
// compute (x+y)/2 unsigned.
|
|
|
|
|
// Do a 64-bit add, the overflow goes into the carry.
|
|
|
|
|
// Shift right once and pull the carry back into the 63rd bit.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AADDQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.ARCRQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 1
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
a := v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
if r == a {
|
2016-03-29 16:39:53 -07:00
|
|
|
if v.AuxInt == 1 {
|
2016-03-12 14:07:40 -08:00
|
|
|
var asm obj.As
|
|
|
|
|
// Software optimization manual recommends add $1,reg.
|
|
|
|
|
// But inc/dec is 1 byte smaller. ICC always uses inc
|
|
|
|
|
// Clang/GCC choose depending on flags, but prefer add.
|
|
|
|
|
// Experiments show that inc/dec is both a little faster
|
|
|
|
|
// and make a binary a little smaller.
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.AINCQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.AINCL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
2016-04-10 08:26:43 -07:00
|
|
|
}
|
|
|
|
|
if v.AuxInt == -1 {
|
2016-03-12 14:07:40 -08:00
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ADECQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ADECL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-04-10 08:26:43 -07:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = v.AuxInt
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = a
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2016-03-11 00:10:52 -05:00
|
|
|
|
2016-08-23 10:43:47 -07:00
|
|
|
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-11 00:10:52 -05:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-11 00:10:52 -05:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-11 00:10:52 -05:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
// TODO: Teach doasm to compile the three-address multiply imul $c, r1, r2
|
2016-04-10 08:26:43 -07:00
|
|
|
// then we don't need to use resultInArg0 for these ops.
|
2016-03-12 14:07:40 -08:00
|
|
|
//p.From3 = new(obj.Addr)
|
|
|
|
|
//p.From3.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
//p.From3.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
|
|
|
|
|
ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
|
|
|
|
|
ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
|
|
|
|
|
ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
|
|
|
|
|
ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
|
|
|
|
|
ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
|
|
|
|
|
ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[0].Reg()
|
|
|
|
|
i := v.Args[1].Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.ALEAQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64LEAQ1:
|
|
|
|
|
p.From.Scale = 1
|
2016-03-31 14:09:04 -07:00
|
|
|
if i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LEAQ2:
|
|
|
|
|
p.From.Scale = 2
|
|
|
|
|
case ssa.OpAMD64LEAQ4:
|
|
|
|
|
p.From.Scale = 4
|
|
|
|
|
case ssa.OpAMD64LEAQ8:
|
|
|
|
|
p.From.Scale = 8
|
|
|
|
|
}
|
2016-03-31 14:09:04 -07:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.From.Index = i
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-08-08 11:26:25 -07:00
|
|
|
case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
|
2017-02-06 10:55:39 -08:00
|
|
|
ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
|
|
|
|
|
ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
|
|
|
|
|
// Go assembler has swapped operands for UCOMISx relative to CMP,
|
|
|
|
|
// must account for that right here.
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.To.Offset = v.AuxInt
|
2017-02-06 10:55:39 -08:00
|
|
|
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
|
|
|
|
|
ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Reg()
|
2017-03-24 08:13:17 +01:00
|
|
|
asm := v.Op.Asm()
|
|
|
|
|
// Use MOVL to move a small constant into a register
|
|
|
|
|
// when the constant is positive and fits into 32 bits.
|
|
|
|
|
if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
|
|
|
|
|
// The upper 32bit are zeroed automatically when using MOVL.
|
|
|
|
|
asm = x86.AMOVL
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x
|
|
|
|
|
// If flags are live at this instruction, suppress the
|
|
|
|
|
// MOV $0,AX -> XOR AX,AX optimization.
|
|
|
|
|
if v.Aux != nil {
|
|
|
|
|
p.Mark |= x86.PRESERVEFLAGS
|
|
|
|
|
}
|
|
|
|
|
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_FCONST
|
|
|
|
|
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x
|
2016-03-22 16:22:21 -07:00
|
|
|
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-06-20 15:36:34 -05:00
|
|
|
case ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.From.Scale = 8
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.From.Scale = 4
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVWloadidx2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.From.Scale = 2
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-31 09:34:35 -07:00
|
|
|
case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[0].Reg()
|
|
|
|
|
i := v.Args[1].Reg()
|
2016-03-31 14:09:04 -07:00
|
|
|
if i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-03-31 14:09:04 -07:00
|
|
|
p.From.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Scale = 1
|
2016-03-31 14:09:04 -07:00
|
|
|
p.From.Index = i
|
|
|
|
|
gc.AddAux(&p.From, v)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2017-06-20 15:36:34 -05:00
|
|
|
case ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Scale = 8
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
case ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Scale = 4
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
case ssa.OpAMD64MOVWstoreidx2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Scale = 2
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Index = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-03-31 09:34:35 -07:00
|
|
|
case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[0].Reg()
|
|
|
|
|
i := v.Args[1].Reg()
|
2016-03-31 14:09:04 -07:00
|
|
|
if i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-03-31 14:09:04 -07:00
|
|
|
p.To.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Scale = 1
|
2016-03-31 14:09:04 -07:00
|
|
|
p.To.Index = i
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2017-08-09 15:43:25 -05:00
|
|
|
case ssa.OpAMD64ADDQconstmem, ssa.OpAMD64ADDLconstmem:
|
|
|
|
|
sc := v.AuxValAndOff()
|
|
|
|
|
off := sc.Off()
|
|
|
|
|
val := sc.Val()
|
|
|
|
|
if val == 1 {
|
|
|
|
|
var asm obj.As
|
|
|
|
|
if v.Op == ssa.OpAMD64ADDQconstmem {
|
|
|
|
|
asm = x86.AINCQ
|
|
|
|
|
} else {
|
|
|
|
|
asm = x86.AINCL
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(asm)
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux2(&p.To, v, off)
|
|
|
|
|
} else {
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = val
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux2(&p.To, v, off)
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
sc := v.AuxValAndOff()
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = sc.Val()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
2016-03-31 09:34:35 -07:00
|
|
|
case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
sc := v.AuxValAndOff()
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = sc.Val()
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[0].Reg()
|
|
|
|
|
i := v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
switch v.Op {
|
2016-03-31 09:34:35 -07:00
|
|
|
case ssa.OpAMD64MOVBstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx1:
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Scale = 1
|
2016-03-31 14:09:04 -07:00
|
|
|
if i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVWstoreconstidx2:
|
|
|
|
|
p.To.Scale = 2
|
|
|
|
|
case ssa.OpAMD64MOVLstoreconstidx4:
|
|
|
|
|
p.To.Scale = 4
|
|
|
|
|
case ssa.OpAMD64MOVQstoreconstidx8:
|
|
|
|
|
p.To.Scale = 8
|
|
|
|
|
}
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-03-31 14:09:04 -07:00
|
|
|
p.To.Reg = r
|
|
|
|
|
p.To.Index = i
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
|
|
|
|
case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
|
|
|
|
|
ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
|
|
|
|
|
ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
|
2016-10-19 20:21:42 +03:00
|
|
|
case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
|
|
|
|
|
r := v.Reg()
|
|
|
|
|
// Break false dependency on destination register.
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AXORPS, r, r)
|
|
|
|
|
opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
|
cmd/compile,math: improve code generation for math.Abs
Implement int reg <-> fp reg moves on amd64.
If we see a load to int reg followed by an int->fp move, then we can just
load to the fp reg instead. Same for stores.
math.Abs is now:
MOVQ "".x+8(SP), AX
SHLQ $1, AX
SHRQ $1, AX
MOVQ AX, "".~r1+16(SP)
math.Copysign is now:
MOVQ "".x+8(SP), AX
SHLQ $1, AX
SHRQ $1, AX
MOVQ "".y+16(SP), CX
SHRQ $63, CX
SHLQ $63, CX
ORQ CX, AX
MOVQ AX, "".~r2+24(SP)
math.Float64bits is now:
MOVSD "".x+8(SP), X0
MOVSD X0, "".~r1+16(SP)
(it would be nicer to use a non-SSE reg for this, nothing is perfect)
And due to the fix for #21440, the inlined version of these improve as well.
name old time/op new time/op delta
Abs 1.38ns ± 5% 0.89ns ±10% -35.54% (p=0.000 n=10+10)
Copysign 1.56ns ± 7% 1.35ns ± 6% -13.77% (p=0.000 n=9+10)
Fixes #13095
Change-Id: Ibd7f2792412a6668608780b0688a77062e1f1499
Reviewed-on: https://go-review.googlesource.com/58732
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
2017-08-24 13:19:40 -07:00
|
|
|
case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
|
|
|
|
|
p := s.Prog(x86.AMOVQ)
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
|
|
|
|
|
p := s.Prog(x86.AMOVL)
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2017-02-10 13:17:20 -06:00
|
|
|
case ssa.OpAMD64ADDQmem, ssa.OpAMD64ADDLmem, ssa.OpAMD64SUBQmem, ssa.OpAMD64SUBLmem,
|
|
|
|
|
ssa.OpAMD64ANDQmem, ssa.OpAMD64ANDLmem, ssa.OpAMD64ORQmem, ssa.OpAMD64ORLmem,
|
|
|
|
|
ssa.OpAMD64XORQmem, ssa.OpAMD64XORLmem, ssa.OpAMD64ADDSDmem, ssa.OpAMD64ADDSSmem,
|
|
|
|
|
ssa.OpAMD64SUBSDmem, ssa.OpAMD64SUBSSmem, ssa.OpAMD64MULSDmem, ssa.OpAMD64MULSSmem:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2017-02-10 13:17:20 -06:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
if v.Reg() != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64DUFFZERO:
|
2016-07-28 12:22:49 -04:00
|
|
|
off := duffStart(v.AuxInt)
|
|
|
|
|
adj := duffAdj(v.AuxInt)
|
|
|
|
|
var p *obj.Prog
|
|
|
|
|
if adj != 0 {
|
2017-08-09 14:50:58 -05:00
|
|
|
p = s.Prog(x86.ALEAQ)
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-07-28 12:22:49 -04:00
|
|
|
p.From.Offset = adj
|
2017-08-09 14:50:58 -05:00
|
|
|
p.From.Reg = x86.REG_DI
|
2016-07-28 12:22:49 -04:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x86.REG_DI
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(obj.ADUFFZERO)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_ADDR
|
2017-02-06 14:46:48 -08:00
|
|
|
p.To.Sym = gc.Duffzero
|
2016-07-28 12:22:49 -04:00
|
|
|
p.To.Offset = off
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVOconst:
|
|
|
|
|
if v.AuxInt != 0 {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("MOVOconst can only do constant=0")
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AXORPS, r, r)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64DUFFCOPY:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.ADUFFCOPY)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_ADDR
|
2017-02-06 14:46:48 -08:00
|
|
|
p.To.Sym = gc.Duffcopy
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Offset = v.AuxInt
|
|
|
|
|
|
2017-08-24 11:31:58 -07:00
|
|
|
case ssa.OpAMD64MOVQconvert, ssa.OpAMD64MOVLconvert:
|
|
|
|
|
if v.Args[0].Reg() != v.Reg() {
|
|
|
|
|
v.Fatalf("MOVXconvert should be a no-op")
|
|
|
|
|
}
|
|
|
|
|
case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
|
2016-04-21 10:02:36 -07:00
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
return
|
|
|
|
|
}
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Args[0].Reg()
|
|
|
|
|
y := v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
if x != y {
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, moveByType(v.Type), y, x)
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.OpLoadReg:
|
|
|
|
|
if v.Type.IsFlags() {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("load flags not implemented: %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
return
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(loadByType(v.Type))
|
2016-10-03 12:26:25 -07:00
|
|
|
gc.AddrAuto(&p.From, v.Args[0])
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpStoreReg:
|
|
|
|
|
if v.Type.IsFlags() {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("store flags not implemented: %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
return
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(storeByType(v.Type))
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-10-03 12:26:25 -07:00
|
|
|
gc.AddrAuto(&p.To, v)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredGetClosurePtr:
|
2016-07-03 13:40:03 -07:00
|
|
|
// Closure pointer is DX.
|
|
|
|
|
gc.CheckLoweredGetClosurePtr(v)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredGetG:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
// See the comments in cmd/internal/obj/x86/obj6.go
|
|
|
|
|
// near CanUse1InsnTLS for a detailed explanation of these instructions.
|
|
|
|
|
if x86.CanUse1InsnTLS(gc.Ctxt) {
|
|
|
|
|
// MOVQ (TLS), r
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = x86.REG_TLS
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
} else {
|
|
|
|
|
// MOVQ TLS, r
|
|
|
|
|
// MOVQ (r)(TLS*1), r
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_TLS
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.From.Type = obj.TYPE_MEM
|
|
|
|
|
q.From.Reg = r
|
|
|
|
|
q.From.Index = x86.REG_TLS
|
|
|
|
|
q.From.Scale = 1
|
|
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = r
|
|
|
|
|
}
|
2017-03-10 18:34:41 -08:00
|
|
|
case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
|
|
|
|
|
s.Call(v)
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
|
2016-03-11 00:10:52 -05:00
|
|
|
ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
|
2016-04-22 13:09:18 -07:00
|
|
|
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2017-03-16 14:08:31 -07:00
|
|
|
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-23 10:43:47 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-08-23 10:43:47 -07:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg0()
|
2016-08-23 10:43:47 -07:00
|
|
|
case ssa.OpAMD64SQRTSD:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-03-16 21:33:03 -07:00
|
|
|
case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
|
|
|
|
|
if v.Args[0].Reg() != v.Reg() {
|
|
|
|
|
// POPCNT on Intel has a false dependency on the destination register.
|
|
|
|
|
// Zero the destination to break the dependency.
|
|
|
|
|
p := s.Prog(x86.AMOVQ)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 0
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
|
|
|
|
|
ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
|
|
|
|
|
ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
|
|
|
|
|
ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
|
|
|
|
|
ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
|
|
|
|
|
ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
|
|
|
|
|
ssa.OpAMD64SETA, ssa.OpAMD64SETAE:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64SETNEF:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.ASETPS)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = x86.REG_AX
|
|
|
|
|
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64SETEQF:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.ASETPC)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = x86.REG_AX
|
|
|
|
|
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64InvertFlags:
|
2016-03-11 00:10:52 -05:00
|
|
|
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
|
2016-03-11 00:10:52 -05:00
|
|
|
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
|
2016-08-28 11:17:37 -07:00
|
|
|
case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
|
|
|
|
|
v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64REPSTOSQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.AREP)
|
|
|
|
|
s.Prog(x86.ASTOSQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64REPMOVSQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.AREP)
|
|
|
|
|
s.Prog(x86.AMOVSQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredNilCheck:
|
|
|
|
|
// Issue a load which will fault if the input is nil.
|
|
|
|
|
// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
|
2017-08-19 22:33:51 +02:00
|
|
|
// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
|
2016-03-12 14:07:40 -08:00
|
|
|
// but it doesn't have false dependency on AX.
|
|
|
|
|
// Or maybe allocate an output register and use MOVL (reg),reg2 ?
|
|
|
|
|
// That trades clobbering flags for clobbering a register.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.ATESTB)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_AX
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-12-07 18:14:35 -08:00
|
|
|
if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
|
|
|
|
|
gc.Warnl(v.Pos, "generated nil check")
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2016-08-23 16:49:28 -07:00
|
|
|
case ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-23 16:49:28 -07:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-08-23 16:49:28 -07:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg0()
|
2016-08-23 16:49:28 -07:00
|
|
|
case ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg0()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-08-23 16:49:28 -07:00
|
|
|
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-23 16:49:28 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[1].Reg()
|
2016-08-23 16:49:28 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-08-25 16:02:57 -07:00
|
|
|
case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg0()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-08-25 16:02:57 -07:00
|
|
|
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[1].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
|
2016-09-16 09:36:00 -07:00
|
|
|
if v.Args[1].Reg() != x86.REG_AX {
|
2016-08-25 16:02:57 -07:00
|
|
|
v.Fatalf("input[1] not in AX %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.ASETEQ)
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg0()
|
2016-08-25 16:02:57 -07:00
|
|
|
case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-06-08 22:02:08 -07:00
|
|
|
case ssa.OpClobber:
|
|
|
|
|
p := s.Prog(x86.AMOVL)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 0xdeaddead
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = x86.REG_SP
|
|
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
p = s.Prog(x86.AMOVL)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 0xdeaddead
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = x86.REG_SP
|
|
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
p.To.Offset += 4
|
2016-03-12 14:07:40 -08:00
|
|
|
default:
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("genValue not implemented: %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var blockJump = [...]struct {
|
|
|
|
|
asm, invasm obj.As
|
|
|
|
|
}{
|
|
|
|
|
ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
|
|
|
|
|
ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
|
|
|
|
|
ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
|
|
|
|
|
ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
|
|
|
|
|
ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
|
|
|
|
|
ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
|
|
|
|
|
ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
|
|
|
|
|
ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
|
|
|
|
|
ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
|
|
|
|
|
ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
|
|
|
|
|
ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
|
|
|
|
|
ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var eqfJumps = [2][2]gc.FloatingEQNEJump{
|
2016-04-29 09:02:27 -07:00
|
|
|
{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
|
|
|
|
|
{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
var nefJumps = [2][2]gc.FloatingEQNEJump{
|
2016-04-29 09:02:27 -07:00
|
|
|
{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
|
|
|
|
|
{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
|
|
|
|
|
switch b.Kind {
|
2016-09-13 17:01:01 -07:00
|
|
|
case ssa.BlockPlain:
|
2016-04-28 16:52:47 -07:00
|
|
|
if b.Succs[0].Block() != next {
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.BlockDefer:
|
|
|
|
|
// defer returns in rax:
|
|
|
|
|
// 0 if we should continue executing
|
|
|
|
|
// 1 if we should jump to deferreturn call
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.ATESTL)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_AX
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x86.REG_AX
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.AJNE)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
|
|
|
|
|
if b.Succs[0].Block() != next {
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.BlockExit:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(obj.AUNDEF) // tell plive.go that we never reach here
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.BlockRet:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(obj.ARET)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.BlockRetJmp:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Name = obj.NAME_EXTERN
|
2017-02-06 13:30:40 -08:00
|
|
|
p.To.Sym = b.Aux.(*obj.LSym)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64EQF:
|
2017-03-22 10:27:30 -07:00
|
|
|
s.FPJump(b, next, &eqfJumps)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64NEF:
|
2017-03-22 10:27:30 -07:00
|
|
|
s.FPJump(b, next, &nefJumps)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
|
|
|
|
|
ssa.BlockAMD64LT, ssa.BlockAMD64GE,
|
|
|
|
|
ssa.BlockAMD64LE, ssa.BlockAMD64GT,
|
|
|
|
|
ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
|
|
|
|
|
ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
|
|
|
|
|
jmp := blockJump[b.Kind]
|
|
|
|
|
var p *obj.Prog
|
|
|
|
|
switch next {
|
2016-04-28 16:52:47 -07:00
|
|
|
case b.Succs[0].Block():
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(jmp.invasm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
|
|
|
|
|
case b.Succs[1].Block():
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(jmp.asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
default:
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(jmp.asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: q, B: b.Succs[1].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
2016-09-14 10:01:05 -07:00
|
|
|
b.Fatalf("branch not implemented: %s. Control: %s", b.LongString(), b.Control.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|