2016-03-12 14:07:40 -08:00
|
|
|
// Copyright 2016 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package amd64
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"fmt"
|
|
|
|
|
"math"
|
|
|
|
|
|
|
|
|
|
"cmd/compile/internal/gc"
|
2019-10-29 14:24:43 -04:00
|
|
|
"cmd/compile/internal/logopt"
|
2016-03-12 14:07:40 -08:00
|
|
|
"cmd/compile/internal/ssa"
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
"cmd/compile/internal/types"
|
2016-03-12 14:07:40 -08:00
|
|
|
"cmd/internal/obj"
|
|
|
|
|
"cmd/internal/obj/x86"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// markMoves marks any MOVXconst ops that need to avoid clobbering flags.
|
|
|
|
|
func ssaMarkMoves(s *gc.SSAGenState, b *ssa.Block) {
|
|
|
|
|
flive := b.FlagsLiveAtEnd
|
2019-08-12 20:19:58 +01:00
|
|
|
for _, c := range b.ControlValues() {
|
|
|
|
|
flive = c.Type.IsFlags() || flive
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
for i := len(b.Values) - 1; i >= 0; i-- {
|
|
|
|
|
v := b.Values[i]
|
2016-04-22 13:09:18 -07:00
|
|
|
if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
|
2016-03-12 14:07:40 -08:00
|
|
|
// The "mark" is any non-nil Aux value.
|
|
|
|
|
v.Aux = v
|
|
|
|
|
}
|
|
|
|
|
if v.Type.IsFlags() {
|
|
|
|
|
flive = false
|
|
|
|
|
}
|
|
|
|
|
for _, a := range v.Args {
|
|
|
|
|
if a.Type.IsFlags() {
|
|
|
|
|
flive = true
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// loadByType returns the load instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func loadByType(t *types.Type) obj.As {
|
2016-03-12 14:07:40 -08:00
|
|
|
// Avoid partial register write
|
2017-04-28 00:19:49 +00:00
|
|
|
if !t.IsFloat() && t.Size() <= 2 {
|
|
|
|
|
if t.Size() == 1 {
|
2016-03-12 14:07:40 -08:00
|
|
|
return x86.AMOVBLZX
|
|
|
|
|
} else {
|
|
|
|
|
return x86.AMOVWLZX
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
// Otherwise, there's no difference between load and store opcodes.
|
|
|
|
|
return storeByType(t)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// storeByType returns the store instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func storeByType(t *types.Type) obj.As {
|
2017-04-28 00:19:49 +00:00
|
|
|
width := t.Size()
|
2016-03-12 14:07:40 -08:00
|
|
|
if t.IsFloat() {
|
|
|
|
|
switch width {
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVSS
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVSD
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
switch width {
|
|
|
|
|
case 1:
|
|
|
|
|
return x86.AMOVB
|
|
|
|
|
case 2:
|
|
|
|
|
return x86.AMOVW
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVQ
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
panic("bad store type")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// moveByType returns the reg->reg move instruction of the given type.
|
cmd/compile: change ssa.Type into *types.Type
When package ssa was created, Type was in package gc.
To avoid circular dependencies, we used an interface (ssa.Type)
to represent type information in SSA.
In the Go 1.9 cycle, gri extricated the Type type from package gc.
As a result, we can now use it in package ssa.
Now, instead of package types depending on package ssa,
it is the other way.
This is a more sensible dependency tree,
and helps compiler performance a bit.
Though this is a big CL, most of the changes are
mechanical and uninteresting.
Interesting bits:
* Add new singleton globals to package types for the special
SSA types Memory, Void, Invalid, Flags, and Int128.
* Add two new Types, TSSA for the special types,
and TTUPLE, for SSA tuple types.
ssa.MakeTuple is now types.NewTuple.
* Move type comparison result constants CMPlt, CMPeq, and CMPgt
to package types.
* We had picked the name "types" in our rules for the handy
list of types provided by ssa.Config. That conflicted with
the types package name, so change it to "typ".
* Update the type comparison routine to handle tuples and special
types inline.
* Teach gc/fmt.go how to print special types.
* We can now eliminate ElemTypes in favor of just Elem,
and probably also some other duplicated Type methods
designed to return ssa.Type instead of *types.Type.
* The ssa tests were using their own dummy types,
and they were not particularly careful about types in general.
Of necessity, this CL switches them to use *types.Type;
it does not make them more type-accurate.
Unfortunately, using types.Type means initializing a bit
of the types universe.
This is prime for refactoring and improvement.
This shrinks ssa.Value; it now fits in a smaller size class
on 64 bit systems. This doesn't have a giant impact,
though, since most Values are preallocated in a chunk.
name old alloc/op new alloc/op delta
Template 37.9MB ± 0% 37.7MB ± 0% -0.57% (p=0.000 n=10+8)
Unicode 28.9MB ± 0% 28.7MB ± 0% -0.52% (p=0.000 n=10+10)
GoTypes 110MB ± 0% 109MB ± 0% -0.88% (p=0.000 n=10+10)
Flate 24.7MB ± 0% 24.6MB ± 0% -0.66% (p=0.000 n=10+10)
GoParser 31.1MB ± 0% 30.9MB ± 0% -0.61% (p=0.000 n=10+9)
Reflect 73.9MB ± 0% 73.4MB ± 0% -0.62% (p=0.000 n=10+8)
Tar 25.8MB ± 0% 25.6MB ± 0% -0.77% (p=0.000 n=9+10)
XML 41.2MB ± 0% 40.9MB ± 0% -0.80% (p=0.000 n=10+10)
[Geo mean] 40.5MB 40.3MB -0.68%
name old allocs/op new allocs/op delta
Template 385k ± 0% 386k ± 0% ~ (p=0.356 n=10+9)
Unicode 343k ± 1% 344k ± 0% ~ (p=0.481 n=10+10)
GoTypes 1.16M ± 0% 1.16M ± 0% -0.16% (p=0.004 n=10+10)
Flate 238k ± 1% 238k ± 1% ~ (p=0.853 n=10+10)
GoParser 320k ± 0% 320k ± 0% ~ (p=0.720 n=10+9)
Reflect 957k ± 0% 957k ± 0% ~ (p=0.460 n=10+8)
Tar 252k ± 0% 252k ± 0% ~ (p=0.133 n=9+10)
XML 400k ± 0% 400k ± 0% ~ (p=0.796 n=10+10)
[Geo mean] 428k 428k -0.01%
Removing all the interface calls helps non-trivially with CPU, though.
name old time/op new time/op delta
Template 178ms ± 4% 173ms ± 3% -2.90% (p=0.000 n=94+96)
Unicode 85.0ms ± 4% 83.9ms ± 4% -1.23% (p=0.000 n=96+96)
GoTypes 543ms ± 3% 528ms ± 3% -2.73% (p=0.000 n=98+96)
Flate 116ms ± 3% 113ms ± 4% -2.34% (p=0.000 n=96+99)
GoParser 144ms ± 3% 140ms ± 4% -2.80% (p=0.000 n=99+97)
Reflect 344ms ± 3% 334ms ± 4% -3.02% (p=0.000 n=100+99)
Tar 106ms ± 5% 103ms ± 4% -3.30% (p=0.000 n=98+94)
XML 198ms ± 5% 192ms ± 4% -2.88% (p=0.000 n=92+95)
[Geo mean] 178ms 173ms -2.65%
name old user-time/op new user-time/op delta
Template 229ms ± 5% 224ms ± 5% -2.36% (p=0.000 n=95+99)
Unicode 107ms ± 6% 106ms ± 5% -1.13% (p=0.001 n=93+95)
GoTypes 696ms ± 4% 679ms ± 4% -2.45% (p=0.000 n=97+99)
Flate 137ms ± 4% 134ms ± 5% -2.66% (p=0.000 n=99+96)
GoParser 176ms ± 5% 172ms ± 8% -2.27% (p=0.000 n=98+100)
Reflect 430ms ± 6% 411ms ± 5% -4.46% (p=0.000 n=100+92)
Tar 128ms ±13% 123ms ±13% -4.21% (p=0.000 n=100+100)
XML 239ms ± 6% 233ms ± 6% -2.50% (p=0.000 n=95+97)
[Geo mean] 220ms 213ms -2.76%
Change-Id: I15c7d6268347f8358e75066dfdbd77db24e8d0c1
Reviewed-on: https://go-review.googlesource.com/42145
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2017-04-28 14:12:28 -07:00
|
|
|
func moveByType(t *types.Type) obj.As {
|
2016-03-12 14:07:40 -08:00
|
|
|
if t.IsFloat() {
|
|
|
|
|
// Moving the whole sse2 register is faster
|
|
|
|
|
// than moving just the correct low portion of it.
|
|
|
|
|
// There is no xmm->xmm move with 1 byte opcode,
|
|
|
|
|
// so use movups, which has 2 byte opcode.
|
|
|
|
|
return x86.AMOVUPS
|
|
|
|
|
} else {
|
2017-04-28 00:19:49 +00:00
|
|
|
switch t.Size() {
|
2016-03-12 14:07:40 -08:00
|
|
|
case 1:
|
|
|
|
|
// Avoids partial register write
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 2:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 4:
|
|
|
|
|
return x86.AMOVL
|
|
|
|
|
case 8:
|
|
|
|
|
return x86.AMOVQ
|
|
|
|
|
case 16:
|
|
|
|
|
return x86.AMOVUPS // int128s are in SSE registers
|
|
|
|
|
default:
|
2017-04-28 00:19:49 +00:00
|
|
|
panic(fmt.Sprintf("bad int register width %d:%s", t.Size(), t))
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// opregreg emits instructions for
|
|
|
|
|
// dest := dest(To) op src(From)
|
|
|
|
|
// and also returns the created obj.Prog so it
|
|
|
|
|
// may be further adjusted (offset, scale, etc).
|
2017-03-20 08:01:28 -07:00
|
|
|
func opregreg(s *gc.SSAGenState, op obj.As, dest, src int16) *obj.Prog {
|
|
|
|
|
p := s.Prog(op)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = dest
|
|
|
|
|
p.From.Reg = src
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
2019-03-09 12:41:34 -08:00
|
|
|
// memIdx fills out a as an indexed memory reference for v.
|
|
|
|
|
// It assumes that the base register and the index register
|
|
|
|
|
// are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
|
|
|
|
|
// The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
|
|
|
|
|
func memIdx(a *obj.Addr, v *ssa.Value) {
|
|
|
|
|
r, i := v.Args[0].Reg(), v.Args[1].Reg()
|
|
|
|
|
a.Type = obj.TYPE_MEM
|
|
|
|
|
a.Scale = v.Op.Scale()
|
|
|
|
|
if a.Scale == 1 && i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
|
|
|
|
a.Reg = r
|
|
|
|
|
a.Index = i
|
|
|
|
|
}
|
|
|
|
|
|
2017-08-09 14:50:58 -05:00
|
|
|
// DUFFZERO consists of repeated blocks of 4 MOVUPSs + LEAQ,
|
2016-07-28 12:22:49 -04:00
|
|
|
// See runtime/mkduff.go.
|
|
|
|
|
func duffStart(size int64) int64 {
|
|
|
|
|
x, _ := duff(size)
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
func duffAdj(size int64) int64 {
|
|
|
|
|
_, x := duff(size)
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// duff returns the offset (from duffzero, in bytes) and pointer adjust (in bytes)
|
|
|
|
|
// required to use the duffzero mechanism for a block of the given size.
|
|
|
|
|
func duff(size int64) (int64, int64) {
|
|
|
|
|
if size < 32 || size > 1024 || size%dzClearStep != 0 {
|
|
|
|
|
panic("bad duffzero size")
|
|
|
|
|
}
|
|
|
|
|
steps := size / dzClearStep
|
|
|
|
|
blocks := steps / dzBlockLen
|
|
|
|
|
steps %= dzBlockLen
|
|
|
|
|
off := dzBlockSize * (dzBlocks - blocks)
|
|
|
|
|
var adj int64
|
|
|
|
|
if steps != 0 {
|
2017-08-09 14:50:58 -05:00
|
|
|
off -= dzLeaqSize
|
2016-07-28 12:22:49 -04:00
|
|
|
off -= dzMovSize * steps
|
|
|
|
|
adj -= dzClearStep * (dzBlockLen - steps)
|
|
|
|
|
}
|
|
|
|
|
return off, adj
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|
|
|
|
switch v.Op {
|
2018-09-25 03:10:33 -04:00
|
|
|
case ssa.OpAMD64VFMADD231SD:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
|
|
|
|
|
p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
|
|
|
|
|
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[1].Reg()})
|
|
|
|
|
if v.Reg() != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
|
|
|
}
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
r1 := v.Args[0].Reg()
|
|
|
|
|
r2 := v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
switch {
|
|
|
|
|
case r == r1:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r2
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case r == r2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r1
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
default:
|
|
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQ {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = r1
|
|
|
|
|
p.From.Scale = 1
|
|
|
|
|
p.From.Index = r2
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
}
|
2016-04-10 08:26:43 -07:00
|
|
|
// 2-address opcode arithmetic
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
|
|
|
|
|
ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
|
|
|
|
|
ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
|
|
|
|
|
ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
|
|
|
|
|
ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
|
|
|
|
|
ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
|
|
|
|
|
ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
|
2017-03-29 10:04:17 -07:00
|
|
|
ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
|
|
|
|
|
ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
|
|
|
|
|
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
|
cmd/compile: add patterns for bit set/clear/complement on amd64
This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).
Example of code changes from time.(*Time).addSec:
if t.wall&hasMonotonic != 0 {
0x1073465 488b08 MOVQ 0(AX), CX
0x1073468 4889ca MOVQ CX, DX
0x107346b 48c1e93f SHRQ $0x3f, CX
0x107346f 48c1e13f SHLQ $0x3f, CX
0x1073473 48f7c1ffffffff TESTQ $-0x1, CX
0x107347a 746b JE 0x10734e7
if t.wall&hasMonotonic != 0 {
0x1073435 488b08 MOVQ 0(AX), CX
0x1073438 480fbae13f BTQ $0x3f, CX
0x107343d 7363 JAE 0x10734a2
Another example:
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x10734c8 4881e1ffffff3f ANDQ $0x3fffffff, CX
0x10734cf 48c1e61e SHLQ $0x1e, SI
0x10734d3 4809ce ORQ CX, SI
0x10734d6 48b90000000000000080 MOVQ $0x8000000000000000, CX
0x10734e0 4809f1 ORQ SI, CX
0x10734e3 488908 MOVQ CX, 0(AX)
t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
0x107348b 4881e2ffffff3f ANDQ $0x3fffffff, DX
0x1073492 48c1e61e SHLQ $0x1e, SI
0x1073496 4809f2 ORQ SI, DX
0x1073499 480fbaea3f BTSQ $0x3f, DX
0x107349e 488910 MOVQ DX, 0(AX)
Go1 benchmarks seem unaffected, and I would be surprised
otherwise:
name old time/op new time/op delta
BinaryTree17-4 2.64s ± 4% 2.56s ± 9% -2.92% (p=0.008 n=9+9)
Fannkuch11-4 2.90s ± 1% 2.95s ± 3% +1.76% (p=0.010 n=10+9)
FmtFprintfEmpty-4 35.3ns ± 1% 34.5ns ± 2% -2.34% (p=0.004 n=9+8)
FmtFprintfString-4 57.0ns ± 1% 58.4ns ± 5% +2.52% (p=0.029 n=9+10)
FmtFprintfInt-4 59.8ns ± 3% 59.8ns ± 6% ~ (p=0.565 n=10+10)
FmtFprintfIntInt-4 93.9ns ± 3% 91.2ns ± 5% -2.94% (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4 107ns ± 6% 104ns ± 6% ~ (p=0.099 n=10+10)
FmtFprintfFloat-4 187ns ± 3% 188ns ± 3% ~ (p=0.505 n=10+9)
FmtManyArgs-4 410ns ± 1% 415ns ± 6% ~ (p=0.649 n=8+10)
GobDecode-4 5.30ms ± 3% 5.27ms ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 4.62ms ± 5% 4.47ms ± 2% -3.24% (p=0.001 n=9+10)
Gzip-4 197ms ± 4% 193ms ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 30.4ms ± 3% 30.1ms ± 3% ~ (p=0.481 n=10+10)
HTTPClientServer-4 76.3µs ± 1% 76.0µs ± 1% ~ (p=0.236 n=8+9)
JSONEncode-4 10.5ms ± 9% 10.3ms ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 42.3ms ±10% 41.3ms ± 2% ~ (p=0.053 n=9+10)
Mandelbrot200-4 3.80ms ± 2% 3.72ms ± 2% -2.15% (p=0.001 n=9+10)
GoParse-4 2.88ms ±10% 2.81ms ± 2% ~ (p=0.247 n=10+10)
RegexpMatchEasy0_32-4 69.5ns ± 4% 68.6ns ± 2% ~ (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4 165ns ± 3% 162ns ± 3% ~ (p=0.137 n=10+10)
RegexpMatchEasy1_32-4 65.7ns ± 6% 64.4ns ± 2% -2.02% (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4 278ns ± 2% 279ns ± 3% ~ (p=0.991 n=8+9)
RegexpMatchMedium_32-4 99.3ns ± 3% 98.5ns ± 4% ~ (p=0.457 n=10+9)
RegexpMatchMedium_1K-4 30.1µs ± 1% 30.4µs ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 1.40µs ± 2% 1.41µs ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 42.5µs ± 1% 41.5µs ± 3% -2.13% (p=0.002 n=8+9)
Revcomp-4 332ms ± 4% 328ms ± 5% ~ (p=0.720 n=9+10)
Template-4 48.3ms ± 2% 49.6ms ± 3% +2.56% (p=0.002 n=8+10)
TimeParse-4 252ns ± 2% 249ns ± 3% ~ (p=0.116 n=9+10)
TimeFormat-4 262ns ± 4% 252ns ± 3% -4.01% (p=0.000 n=9+10)
name old speed new speed delta
GobDecode-4 145MB/s ± 3% 146MB/s ± 3% ~ (p=0.436 n=10+10)
GobEncode-4 166MB/s ± 5% 172MB/s ± 2% +3.28% (p=0.001 n=9+10)
Gzip-4 98.6MB/s ± 4% 100.4MB/s ± 3% ~ (p=0.123 n=10+10)
Gunzip-4 639MB/s ± 3% 645MB/s ± 3% ~ (p=0.481 n=10+10)
JSONEncode-4 185MB/s ± 8% 189MB/s ± 3% ~ (p=0.280 n=10+10)
JSONDecode-4 46.0MB/s ± 9% 47.0MB/s ± 2% +2.21% (p=0.046 n=9+10)
GoParse-4 20.1MB/s ± 9% 20.6MB/s ± 2% ~ (p=0.239 n=10+10)
RegexpMatchEasy0_32-4 460MB/s ± 4% 467MB/s ± 2% ~ (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4 6.19GB/s ± 3% 6.28GB/s ± 3% ~ (p=0.165 n=10+10)
RegexpMatchEasy1_32-4 487MB/s ± 5% 497MB/s ± 2% +2.00% (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4 3.67GB/s ± 2% 3.67GB/s ± 3% ~ (p=0.963 n=8+9)
RegexpMatchMedium_32-4 10.1MB/s ± 3% 10.1MB/s ± 4% ~ (p=0.435 n=10+9)
RegexpMatchMedium_1K-4 34.0MB/s ± 1% 33.7MB/s ± 2% ~ (p=0.173 n=8+10)
RegexpMatchHard_32-4 22.9MB/s ± 2% 22.7MB/s ± 4% ~ (p=0.565 n=10+10)
RegexpMatchHard_1K-4 24.0MB/s ± 3% 24.7MB/s ± 3% +2.64% (p=0.001 n=9+9)
Revcomp-4 766MB/s ± 4% 775MB/s ± 5% ~ (p=0.720 n=9+10)
Template-4 40.2MB/s ± 2% 39.2MB/s ± 3% -2.47% (p=0.002 n=8+10)
The rules match ~1800 times during all.bash.
Fixes #18943
Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-02-17 13:54:03 +01:00
|
|
|
ssa.OpAMD64PXOR,
|
|
|
|
|
ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
|
|
|
|
|
ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
|
|
|
|
|
ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), r, v.Args[1].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
|
|
|
|
|
// Arg[0] (the dividend) is in AX.
|
|
|
|
|
// Arg[1] (the divisor) can be in any other register.
|
|
|
|
|
// Result[0] (the quotient) is in AX.
|
|
|
|
|
// Result[1] (the remainder) is in DX.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[1].Reg()
|
2016-07-18 10:18:12 -07:00
|
|
|
|
|
|
|
|
// Zero extend dividend.
|
2017-03-20 08:01:28 -07:00
|
|
|
c := s.Prog(x86.AXORL)
|
2016-07-18 10:18:12 -07:00
|
|
|
c.From.Type = obj.TYPE_REG
|
|
|
|
|
c.From.Reg = x86.REG_DX
|
|
|
|
|
c.To.Type = obj.TYPE_REG
|
|
|
|
|
c.To.Reg = x86.REG_DX
|
|
|
|
|
|
|
|
|
|
// Issue divide.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-07-18 10:18:12 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
|
|
|
|
|
// Arg[0] (the dividend) is in AX.
|
|
|
|
|
// Arg[1] (the divisor) can be in any other register.
|
|
|
|
|
// Result[0] (the quotient) is in AX.
|
|
|
|
|
// Result[1] (the remainder) is in DX.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Args[1].Reg()
|
2018-08-06 19:50:38 +10:00
|
|
|
var j1 *obj.Prog
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
// CPU faults upon signed overflow, which occurs when the most
|
|
|
|
|
// negative int is divided by -1. Handle divide by -1 as a special case.
|
2020-01-23 22:18:30 -08:00
|
|
|
if ssa.DivisionNeedsFixUp(v) {
|
2018-08-06 19:50:38 +10:00
|
|
|
var c *obj.Prog
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64DIVQ:
|
|
|
|
|
c = s.Prog(x86.ACMPQ)
|
|
|
|
|
case ssa.OpAMD64DIVL:
|
|
|
|
|
c = s.Prog(x86.ACMPL)
|
|
|
|
|
case ssa.OpAMD64DIVW:
|
|
|
|
|
c = s.Prog(x86.ACMPW)
|
|
|
|
|
}
|
|
|
|
|
c.From.Type = obj.TYPE_REG
|
|
|
|
|
c.From.Reg = r
|
|
|
|
|
c.To.Type = obj.TYPE_CONST
|
|
|
|
|
c.To.Offset = -1
|
|
|
|
|
j1 = s.Prog(x86.AJEQ)
|
|
|
|
|
j1.To.Type = obj.TYPE_BRANCH
|
2016-07-18 10:18:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Sign extend dividend.
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64DIVQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACQO)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVL:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACDQ)
|
2016-07-18 10:18:12 -07:00
|
|
|
case ssa.OpAMD64DIVW:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ACWD)
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
2016-07-18 10:18:12 -07:00
|
|
|
// Issue divide.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-07-18 10:18:12 -07:00
|
|
|
p.From.Reg = r
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2018-08-06 19:50:38 +10:00
|
|
|
if j1 != nil {
|
|
|
|
|
// Skip over -1 fixup code.
|
|
|
|
|
j2 := s.Prog(obj.AJMP)
|
|
|
|
|
j2.To.Type = obj.TYPE_BRANCH
|
2016-07-18 10:18:12 -07:00
|
|
|
|
2018-08-06 19:50:38 +10:00
|
|
|
// Issue -1 fixup code.
|
|
|
|
|
// n / -1 = -n
|
|
|
|
|
var n1 *obj.Prog
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64DIVQ:
|
|
|
|
|
n1 = s.Prog(x86.ANEGQ)
|
|
|
|
|
case ssa.OpAMD64DIVL:
|
|
|
|
|
n1 = s.Prog(x86.ANEGL)
|
|
|
|
|
case ssa.OpAMD64DIVW:
|
|
|
|
|
n1 = s.Prog(x86.ANEGW)
|
|
|
|
|
}
|
|
|
|
|
n1.To.Type = obj.TYPE_REG
|
|
|
|
|
n1.To.Reg = x86.REG_AX
|
2016-07-18 10:18:12 -07:00
|
|
|
|
2018-08-06 19:50:38 +10:00
|
|
|
// n % -1 == 0
|
|
|
|
|
n2 := s.Prog(x86.AXORL)
|
|
|
|
|
n2.From.Type = obj.TYPE_REG
|
|
|
|
|
n2.From.Reg = x86.REG_DX
|
|
|
|
|
n2.To.Type = obj.TYPE_REG
|
|
|
|
|
n2.To.Reg = x86.REG_DX
|
2016-07-18 10:18:12 -07:00
|
|
|
|
2018-08-06 19:50:38 +10:00
|
|
|
// TODO(khr): issue only the -1 fixup code we need.
|
|
|
|
|
// For instance, if only the quotient is used, no point in zeroing the remainder.
|
2016-07-18 10:18:12 -07:00
|
|
|
|
2018-08-06 19:50:38 +10:00
|
|
|
j1.To.Val = n1
|
|
|
|
|
j2.To.Val = s.Pc()
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2017-03-03 11:35:44 -08:00
|
|
|
case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
|
2016-03-12 14:07:40 -08:00
|
|
|
// the frontend rewrites constant division by 8/16/32 bit integers into
|
|
|
|
|
// HMUL by a constant
|
|
|
|
|
// SSA rewrites generate the 64 bit versions
|
|
|
|
|
|
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
|
|
|
// and DX is the only output we care about (the high bits)
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
// IMULB puts the high portion in AH instead of DL,
|
|
|
|
|
// so move it to DL for consistency
|
2017-04-28 00:19:49 +00:00
|
|
|
if v.Type.Size() == 1 {
|
2017-03-20 08:01:28 -07:00
|
|
|
m := s.Prog(x86.AMOVB)
|
2016-03-12 14:07:40 -08:00
|
|
|
m.From.Type = obj.TYPE_REG
|
|
|
|
|
m.From.Reg = x86.REG_AH
|
|
|
|
|
m.To.Type = obj.TYPE_REG
|
|
|
|
|
m.To.Reg = x86.REG_DX
|
|
|
|
|
}
|
|
|
|
|
|
2018-01-27 11:55:34 +01:00
|
|
|
case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
|
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
|
|
|
// results lo in AX
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
|
2016-10-06 15:43:47 -04:00
|
|
|
case ssa.OpAMD64MULQU2:
|
|
|
|
|
// Arg[0] is already in AX as it's the only register we allow
|
|
|
|
|
// results hi in DX, lo in AX
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-10-06 15:43:47 -04:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
|
|
|
|
|
case ssa.OpAMD64DIVQU2:
|
|
|
|
|
// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
|
|
|
|
|
// results q in AX, r in DX
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-10-06 15:43:47 -04:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[2].Reg()
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64AVGQU:
|
|
|
|
|
// compute (x+y)/2 unsigned.
|
|
|
|
|
// Do a 64-bit add, the overflow goes into the carry.
|
|
|
|
|
// Shift right once and pull the carry back into the 63rd bit.
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AADDQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.ARCRQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 1
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
|
2018-10-23 14:05:38 -07:00
|
|
|
case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
|
|
|
|
|
r := v.Reg0()
|
|
|
|
|
r0 := v.Args[0].Reg()
|
|
|
|
|
r1 := v.Args[1].Reg()
|
|
|
|
|
switch r {
|
|
|
|
|
case r0:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r1
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case r1:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r0
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
default:
|
|
|
|
|
v.Fatalf("output not in same register as an input %s", v.LongString())
|
|
|
|
|
}
|
|
|
|
|
|
2018-10-23 14:38:22 -07:00
|
|
|
case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg0()
|
|
|
|
|
|
|
|
|
|
case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
|
2018-10-23 14:05:38 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = v.AuxInt
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg0()
|
|
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
a := v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
if r == a {
|
2019-03-19 12:26:22 -07:00
|
|
|
switch v.AuxInt {
|
|
|
|
|
case 1:
|
2016-03-12 14:07:40 -08:00
|
|
|
var asm obj.As
|
|
|
|
|
// Software optimization manual recommends add $1,reg.
|
|
|
|
|
// But inc/dec is 1 byte smaller. ICC always uses inc
|
|
|
|
|
// Clang/GCC choose depending on flags, but prefer add.
|
|
|
|
|
// Experiments show that inc/dec is both a little faster
|
|
|
|
|
// and make a binary a little smaller.
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.AINCQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.AINCL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
2019-03-19 12:26:22 -07:00
|
|
|
case -1:
|
2016-03-12 14:07:40 -08:00
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ADECQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ADECL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
2019-03-19 12:26:22 -07:00
|
|
|
case 0x80:
|
|
|
|
|
// 'SUBQ $-0x80, r' is shorter to encode than
|
|
|
|
|
// and functionally equivalent to 'ADDQ $0x80, r'.
|
|
|
|
|
asm := x86.ASUBL
|
|
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
|
|
|
|
asm = x86.ASUBQ
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(asm)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = -0x80
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-04-10 08:26:43 -07:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = v.AuxInt
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
return
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
var asm obj.As
|
2016-03-29 13:53:34 +03:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconst {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAQ
|
2016-03-29 13:53:34 +03:00
|
|
|
} else {
|
2016-03-12 14:07:40 -08:00
|
|
|
asm = x86.ALEAL
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = a
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2016-03-11 00:10:52 -05:00
|
|
|
|
2018-03-05 20:59:40 +01:00
|
|
|
case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
|
|
|
|
|
ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
|
|
|
|
|
ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
|
|
|
|
|
ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
|
|
|
|
|
ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
|
|
|
|
|
ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
|
|
|
|
|
ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
|
|
|
|
|
ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
|
|
|
|
|
ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
|
|
|
|
|
ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
|
|
|
|
|
ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
|
|
|
|
|
ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-11 00:10:52 -05:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-11 00:10:52 -05:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-11 00:10:52 -05:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
|
2018-03-05 20:59:40 +01:00
|
|
|
case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
|
|
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
|
|
|
}
|
|
|
|
|
// Flag condition: ^ZERO || PARITY
|
|
|
|
|
// Generate:
|
|
|
|
|
// CMOV*NE SRC,DST
|
|
|
|
|
// CMOV*PS SRC,DST
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
var q *obj.Prog
|
|
|
|
|
if v.Op == ssa.OpAMD64CMOVQNEF {
|
|
|
|
|
q = s.Prog(x86.ACMOVQPS)
|
|
|
|
|
} else if v.Op == ssa.OpAMD64CMOVLNEF {
|
|
|
|
|
q = s.Prog(x86.ACMOVLPS)
|
|
|
|
|
} else {
|
|
|
|
|
q = s.Prog(x86.ACMOVWPS)
|
|
|
|
|
}
|
|
|
|
|
q.From.Type = obj.TYPE_REG
|
|
|
|
|
q.From.Reg = v.Args[1].Reg()
|
|
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = r
|
|
|
|
|
|
|
|
|
|
case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
|
|
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Flag condition: ZERO && !PARITY
|
|
|
|
|
// Generate:
|
|
|
|
|
// MOV SRC,AX
|
|
|
|
|
// CMOV*NE DST,AX
|
|
|
|
|
// CMOV*PC AX,DST
|
|
|
|
|
//
|
|
|
|
|
// TODO(rasky): we could generate:
|
|
|
|
|
// CMOV*NE DST,SRC
|
|
|
|
|
// CMOV*PC SRC,DST
|
|
|
|
|
// But this requires a way for regalloc to know that SRC might be
|
|
|
|
|
// clobbered by this instruction.
|
|
|
|
|
if v.Args[1].Reg() != x86.REG_AX {
|
|
|
|
|
opregreg(s, moveByType(v.Type), x86.REG_AX, v.Args[1].Reg())
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x86.REG_AX
|
|
|
|
|
var q *obj.Prog
|
|
|
|
|
if v.Op == ssa.OpAMD64CMOVQEQF {
|
|
|
|
|
q = s.Prog(x86.ACMOVQPC)
|
|
|
|
|
} else if v.Op == ssa.OpAMD64CMOVLEQF {
|
|
|
|
|
q = s.Prog(x86.ACMOVLPC)
|
|
|
|
|
} else {
|
|
|
|
|
q = s.Prog(x86.ACMOVWPC)
|
|
|
|
|
}
|
|
|
|
|
q.From.Type = obj.TYPE_REG
|
|
|
|
|
q.From.Reg = x86.REG_AX
|
|
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = r
|
|
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2018-03-09 23:09:46 +03:00
|
|
|
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
|
|
|
|
|
ssa.OpAMD64ANDQconst, ssa.OpAMD64ANDLconst,
|
|
|
|
|
ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
|
|
|
|
|
ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
|
|
|
|
|
ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
|
2016-04-10 08:26:43 -07:00
|
|
|
ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
|
|
|
|
|
ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
|
|
|
|
|
ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2018-02-26 07:04:32 -08:00
|
|
|
case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
|
|
|
|
|
ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
|
|
|
|
|
ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2019-03-09 12:41:34 -08:00
|
|
|
memIdx(&p.From, v)
|
|
|
|
|
o := v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2018-05-11 08:01:31 +02:00
|
|
|
p.To.Reg = o
|
|
|
|
|
if v.AuxInt != 0 && v.Aux == nil {
|
|
|
|
|
// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
|
|
|
|
|
p = s.Prog(x86.ALEAQ)
|
|
|
|
|
case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
|
|
|
|
|
p = s.Prog(x86.ALEAL)
|
|
|
|
|
case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
|
|
|
|
|
p = s.Prog(x86.ALEAW)
|
|
|
|
|
}
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = o
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = o
|
|
|
|
|
}
|
|
|
|
|
gc.AddAux(&p.From, v)
|
2018-02-26 07:04:32 -08:00
|
|
|
case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
|
2017-02-06 10:55:39 -08:00
|
|
|
ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
|
|
|
|
|
ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
|
|
|
|
|
// Go assembler has swapped operands for UCOMISx relative to CMP,
|
|
|
|
|
// must account for that right here.
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.To.Offset = v.AuxInt
|
2018-09-08 14:23:14 +00:00
|
|
|
case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
|
|
|
|
|
ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
|
|
|
|
|
ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
|
|
|
|
|
ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
|
|
|
|
|
ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
|
2018-03-10 11:17:05 +01:00
|
|
|
op := v.Op
|
|
|
|
|
if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
|
|
|
|
|
// Emit 32-bit version because it's shorter
|
|
|
|
|
op = ssa.OpAMD64BTLconst
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = v.AuxInt
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2018-05-08 09:11:00 -07:00
|
|
|
case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
|
2018-01-03 14:38:55 -08:00
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Args[1].Reg()
|
2018-05-08 09:11:00 -07:00
|
|
|
case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
|
2018-01-03 14:38:55 -08:00
|
|
|
sc := v.AuxValAndOff()
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux2(&p.From, v, sc.Off())
|
|
|
|
|
p.To.Type = obj.TYPE_CONST
|
|
|
|
|
p.To.Offset = sc.Val()
|
2020-03-19 17:48:42 -07:00
|
|
|
case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
memIdx(&p.From, v)
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Args[2].Reg()
|
|
|
|
|
case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
|
|
|
|
|
sc := v.AuxValAndOff()
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
memIdx(&p.From, v)
|
|
|
|
|
gc.AddAux2(&p.From, v, sc.Off())
|
|
|
|
|
p.To.Type = obj.TYPE_CONST
|
|
|
|
|
p.To.Offset = sc.Val()
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Reg()
|
2017-10-24 13:24:14 -07:00
|
|
|
|
|
|
|
|
// If flags aren't live (indicated by v.Aux == nil),
|
|
|
|
|
// then we can rewrite MOV $0, AX into XOR AX, AX.
|
|
|
|
|
if v.AuxInt == 0 && v.Aux == nil {
|
|
|
|
|
p := s.Prog(x86.AXORL)
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-24 08:13:17 +01:00
|
|
|
asm := v.Op.Asm()
|
|
|
|
|
// Use MOVL to move a small constant into a register
|
|
|
|
|
// when the constant is positive and fits into 32 bits.
|
|
|
|
|
if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
|
|
|
|
|
// The upper 32bit are zeroed automatically when using MOVL.
|
|
|
|
|
asm = x86.AMOVL
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(asm)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = v.AuxInt
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x
|
|
|
|
|
case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_FCONST
|
|
|
|
|
p.From.Val = math.Float64frombits(uint64(v.AuxInt))
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x
|
2016-03-22 16:22:21 -07:00
|
|
|
case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload, ssa.OpAMD64MOVOload:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2018-10-06 03:35:17 +00:00
|
|
|
case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
|
|
|
|
|
ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2019-03-09 12:41:34 -08:00
|
|
|
memIdx(&p.From, v)
|
2016-03-31 14:09:04 -07:00
|
|
|
gc.AddAux(&p.From, v)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2018-06-29 02:11:53 +00:00
|
|
|
case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
|
2018-09-17 02:05:22 +00:00
|
|
|
ssa.OpAMD64BTCQmodify, ssa.OpAMD64BTCLmodify, ssa.OpAMD64BTRQmodify, ssa.OpAMD64BTRLmodify, ssa.OpAMD64BTSQmodify, ssa.OpAMD64BTSLmodify,
|
2018-06-29 02:11:53 +00:00
|
|
|
ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
|
|
|
|
|
ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2018-10-06 03:35:17 +00:00
|
|
|
case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
|
|
|
|
|
ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2019-03-09 12:41:34 -08:00
|
|
|
memIdx(&p.To, v)
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux(&p.To, v)
|
2018-05-08 09:11:00 -07:00
|
|
|
case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
|
2017-08-09 15:43:25 -05:00
|
|
|
sc := v.AuxValAndOff()
|
|
|
|
|
off := sc.Off()
|
|
|
|
|
val := sc.Val()
|
2018-10-29 08:34:42 +00:00
|
|
|
if val == 1 || val == -1 {
|
2017-08-09 15:43:25 -05:00
|
|
|
var asm obj.As
|
2018-05-08 09:11:00 -07:00
|
|
|
if v.Op == ssa.OpAMD64ADDQconstmodify {
|
2018-10-29 08:34:42 +00:00
|
|
|
if val == 1 {
|
|
|
|
|
asm = x86.AINCQ
|
|
|
|
|
} else {
|
|
|
|
|
asm = x86.ADECQ
|
|
|
|
|
}
|
2017-08-09 15:43:25 -05:00
|
|
|
} else {
|
2018-10-29 08:34:42 +00:00
|
|
|
if val == 1 {
|
|
|
|
|
asm = x86.AINCL
|
|
|
|
|
} else {
|
|
|
|
|
asm = x86.ADECL
|
|
|
|
|
}
|
2017-08-09 15:43:25 -05:00
|
|
|
}
|
|
|
|
|
p := s.Prog(asm)
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux2(&p.To, v, off)
|
2018-09-18 01:53:42 +00:00
|
|
|
break
|
2017-08-09 15:43:25 -05:00
|
|
|
}
|
2018-09-18 01:53:42 +00:00
|
|
|
fallthrough
|
2018-06-27 02:46:17 +00:00
|
|
|
case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
|
2018-09-17 02:05:22 +00:00
|
|
|
ssa.OpAMD64BTCQconstmodify, ssa.OpAMD64BTCLconstmodify, ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTSLconstmodify,
|
|
|
|
|
ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTRLconstmodify, ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify:
|
2018-06-27 02:46:17 +00:00
|
|
|
sc := v.AuxValAndOff()
|
|
|
|
|
off := sc.Off()
|
|
|
|
|
val := sc.Val()
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = val
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux2(&p.To, v, off)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
sc := v.AuxValAndOff()
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = sc.Val()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
2016-03-31 09:34:35 -07:00
|
|
|
case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
sc := v.AuxValAndOff()
|
2016-03-29 16:39:53 -07:00
|
|
|
p.From.Offset = sc.Val()
|
2019-03-09 12:41:34 -08:00
|
|
|
memIdx(&p.To, v)
|
2016-03-12 14:07:40 -08:00
|
|
|
gc.AddAux2(&p.To, v, sc.Off())
|
|
|
|
|
case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
|
|
|
|
|
ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
|
|
|
|
|
ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS:
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
|
2016-10-19 20:21:42 +03:00
|
|
|
case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
|
|
|
|
|
r := v.Reg()
|
|
|
|
|
// Break false dependency on destination register.
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AXORPS, r, r)
|
|
|
|
|
opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
|
2018-10-08 02:20:03 +00:00
|
|
|
case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
|
|
|
|
|
var p *obj.Prog
|
|
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
|
|
|
|
|
p = s.Prog(x86.AMOVQ)
|
|
|
|
|
case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
|
|
|
|
|
p = s.Prog(x86.AMOVL)
|
|
|
|
|
}
|
cmd/compile,math: improve code generation for math.Abs
Implement int reg <-> fp reg moves on amd64.
If we see a load to int reg followed by an int->fp move, then we can just
load to the fp reg instead. Same for stores.
math.Abs is now:
MOVQ "".x+8(SP), AX
SHLQ $1, AX
SHRQ $1, AX
MOVQ AX, "".~r1+16(SP)
math.Copysign is now:
MOVQ "".x+8(SP), AX
SHLQ $1, AX
SHRQ $1, AX
MOVQ "".y+16(SP), CX
SHRQ $63, CX
SHLQ $63, CX
ORQ CX, AX
MOVQ AX, "".~r2+24(SP)
math.Float64bits is now:
MOVSD "".x+8(SP), X0
MOVSD X0, "".~r1+16(SP)
(it would be nicer to use a non-SSE reg for this, nothing is perfect)
And due to the fix for #21440, the inlined version of these improve as well.
name old time/op new time/op delta
Abs 1.38ns ± 5% 0.89ns ±10% -35.54% (p=0.000 n=10+10)
Copysign 1.56ns ± 7% 1.35ns ± 6% -13.77% (p=0.000 n=9+10)
Fixes #13095
Change-Id: Ibd7f2792412a6668608780b0688a77062e1f1499
Reviewed-on: https://go-review.googlesource.com/58732
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Reviewed-by: Ilya Tocar <ilya.tocar@intel.com>
2017-08-24 13:19:40 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2018-05-08 09:11:00 -07:00
|
|
|
case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
|
|
|
|
|
ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
|
|
|
|
|
ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
|
2018-06-21 10:14:18 +00:00
|
|
|
ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
|
|
|
|
|
ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2017-02-10 13:17:20 -06:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = v.Args[1].Reg()
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
if v.Reg() != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2020-04-11 22:15:58 -07:00
|
|
|
}
|
|
|
|
|
case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
|
|
|
|
|
ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
|
|
|
|
|
ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
|
|
|
|
|
ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
|
|
|
|
|
ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
|
|
|
|
|
r, i := v.Args[1].Reg(), v.Args[2].Reg()
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Scale = v.Op.Scale()
|
|
|
|
|
if p.From.Scale == 1 && i == x86.REG_SP {
|
|
|
|
|
r, i = i, r
|
|
|
|
|
}
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.From.Index = i
|
|
|
|
|
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
if v.Reg() != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2017-02-10 13:17:20 -06:00
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64DUFFZERO:
|
2016-07-28 12:22:49 -04:00
|
|
|
off := duffStart(v.AuxInt)
|
|
|
|
|
adj := duffAdj(v.AuxInt)
|
|
|
|
|
var p *obj.Prog
|
|
|
|
|
if adj != 0 {
|
2017-08-09 14:50:58 -05:00
|
|
|
p = s.Prog(x86.ALEAQ)
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-07-28 12:22:49 -04:00
|
|
|
p.From.Offset = adj
|
2017-08-09 14:50:58 -05:00
|
|
|
p.From.Reg = x86.REG_DI
|
2016-07-28 12:22:49 -04:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x86.REG_DI
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(obj.ADUFFZERO)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_ADDR
|
2017-02-06 14:46:48 -08:00
|
|
|
p.To.Sym = gc.Duffzero
|
2016-07-28 12:22:49 -04:00
|
|
|
p.To.Offset = off
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64MOVOconst:
|
|
|
|
|
if v.AuxInt != 0 {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("MOVOconst can only do constant=0")
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AXORPS, r, r)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64DUFFCOPY:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.ADUFFCOPY)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_ADDR
|
2017-02-06 14:46:48 -08:00
|
|
|
p.To.Sym = gc.Duffcopy
|
2020-04-23 13:11:00 -07:00
|
|
|
if v.AuxInt%16 != 0 {
|
|
|
|
|
v.Fatalf("bad DUFFCOPY AuxInt %v", v.AuxInt)
|
|
|
|
|
}
|
|
|
|
|
p.To.Offset = 14 * (64 - v.AuxInt/16)
|
|
|
|
|
// 14 and 64 are magic constants. 14 is the number of bytes to encode:
|
|
|
|
|
// MOVUPS (SI), X0
|
|
|
|
|
// ADDQ $16, SI
|
|
|
|
|
// MOVUPS X0, (DI)
|
|
|
|
|
// ADDQ $16, DI
|
|
|
|
|
// and 64 is the number of such blocks. See src/runtime/duff_amd64.s:duffcopy.
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2017-08-24 11:31:58 -07:00
|
|
|
case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
|
2016-04-21 10:02:36 -07:00
|
|
|
if v.Type.IsMemory() {
|
|
|
|
|
return
|
|
|
|
|
}
|
2016-09-16 09:36:00 -07:00
|
|
|
x := v.Args[0].Reg()
|
|
|
|
|
y := v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
if x != y {
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, moveByType(v.Type), y, x)
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.OpLoadReg:
|
|
|
|
|
if v.Type.IsFlags() {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("load flags not implemented: %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
return
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(loadByType(v.Type))
|
2016-10-03 12:26:25 -07:00
|
|
|
gc.AddrAuto(&p.From, v.Args[0])
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpStoreReg:
|
|
|
|
|
if v.Type.IsFlags() {
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("store flags not implemented: %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
return
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(storeByType(v.Type))
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-10-03 12:26:25 -07:00
|
|
|
gc.AddrAuto(&p.To, v)
|
2019-12-19 10:58:28 -08:00
|
|
|
case ssa.OpAMD64LoweredHasCPUFeature:
|
2020-04-04 19:22:28 -07:00
|
|
|
p := s.Prog(x86.AMOVBQZX)
|
2019-12-19 10:58:28 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredGetClosurePtr:
|
2016-07-03 13:40:03 -07:00
|
|
|
// Closure pointer is DX.
|
|
|
|
|
gc.CheckLoweredGetClosurePtr(v)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredGetG:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
// See the comments in cmd/internal/obj/x86/obj6.go
|
|
|
|
|
// near CanUse1InsnTLS for a detailed explanation of these instructions.
|
|
|
|
|
if x86.CanUse1InsnTLS(gc.Ctxt) {
|
|
|
|
|
// MOVQ (TLS), r
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Reg = x86.REG_TLS
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
} else {
|
|
|
|
|
// MOVQ TLS, r
|
|
|
|
|
// MOVQ (r)(TLS*1), r
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_TLS
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.AMOVQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.From.Type = obj.TYPE_MEM
|
|
|
|
|
q.From.Reg = r
|
|
|
|
|
q.From.Index = x86.REG_TLS
|
|
|
|
|
q.From.Scale = 1
|
|
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = r
|
|
|
|
|
}
|
2017-03-10 18:34:41 -08:00
|
|
|
case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
|
|
|
|
|
s.Call(v)
|
2016-10-24 10:25:05 -04:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64LoweredGetCallerPC:
|
|
|
|
|
p := s.Prog(x86.AMOVQ)
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
|
|
|
|
|
p.From.Name = obj.NAME_PARAM
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
|
2017-10-09 15:33:29 -04:00
|
|
|
case ssa.OpAMD64LoweredGetCallerSP:
|
|
|
|
|
// caller's SP is the address of the first arg
|
|
|
|
|
mov := x86.AMOVQ
|
|
|
|
|
if gc.Widthptr == 4 {
|
|
|
|
|
mov = x86.AMOVL
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(mov)
|
|
|
|
|
p.From.Type = obj.TYPE_ADDR
|
|
|
|
|
p.From.Offset = -gc.Ctxt.FixedFrameSize() // 0 on amd64, just to be consistent with other architectures
|
|
|
|
|
p.From.Name = obj.NAME_PARAM
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
|
2017-10-26 12:33:04 -04:00
|
|
|
case ssa.OpAMD64LoweredWB:
|
|
|
|
|
p := s.Prog(obj.ACALL)
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Name = obj.NAME_EXTERN
|
cmd/compile, runtime: use more registers for amd64 write barrier calls
The compiler-inserted write barrier calls use a special ABI
for speed and to minimize the binary size impact.
runtime.gcWriteBarrier takes its args in DI and AX.
This change adds gcWriteBarrier wrapper functions,
varying only in the register used for the second argument.
(Allowing variation in the first argument doesn't offer improvements,
which is convenient, as it avoids quadratic API growth.)
This reduces the number of register copies.
The goals are reduced binary size via reduced register pressure/copies.
One downside to this change is that when the write barrier is on,
we may bounce through several different write barrier wrappers,
which is bad for the instruction cache.
Package runtime write barrier benchmarks for this change:
name old time/op new time/op delta
WriteBarrier-8 16.6ns ± 6% 15.6ns ± 6% -5.73% (p=0.000 n=97+99)
BulkWriteBarrier-8 4.37ns ± 7% 4.22ns ± 8% -3.45% (p=0.000 n=96+99)
However, I don't particularly trust these numbers.
I ran runtime.BenchmarkWriteBarrier multiple times as I rebased
this change, and noticed that the results have high variance
depending on the parent change, perhaps due to aligment.
This change was stress tested with GOGC=1 GODEBUG=gccheckmark=1 go test std.
This change reduces binary sizes:
file before after Δ %
addr2line 4308720 4296688 -12032 -0.279%
api 5965592 5945368 -20224 -0.339%
asm 5148088 5025464 -122624 -2.382%
buildid 2848760 2844904 -3856 -0.135%
cgo 4828968 4812840 -16128 -0.334%
compile 19754720 19529744 -224976 -1.139%
cover 5256840 5236600 -20240 -0.385%
dist 3670312 3658264 -12048 -0.328%
doc 4669608 4657576 -12032 -0.258%
fix 3377976 3365944 -12032 -0.356%
link 6614888 6586472 -28416 -0.430%
nm 4258368 4254528 -3840 -0.090%
objdump 4656336 4644304 -12032 -0.258%
pack 2295176 2295432 +256 +0.011%
pprof 14762356 14709364 -52992 -0.359%
test2json 2824456 2820600 -3856 -0.137%
trace 11684404 11643700 -40704 -0.348%
vet 8284760 8252248 -32512 -0.392%
total 115210328 114580040 -630288 -0.547%
This change improves compiler performance:
name old time/op new time/op delta
Template 208ms ± 3% 207ms ± 3% -0.40% (p=0.030 n=43+44)
Unicode 80.2ms ± 3% 81.3ms ± 3% +1.25% (p=0.000 n=41+44)
GoTypes 699ms ± 3% 694ms ± 2% -0.71% (p=0.016 n=42+37)
Compiler 3.26s ± 2% 3.23s ± 2% -0.86% (p=0.000 n=43+45)
SSA 6.97s ± 1% 6.93s ± 1% -0.63% (p=0.000 n=43+45)
Flate 134ms ± 3% 133ms ± 2% ~ (p=0.139 n=45+42)
GoParser 165ms ± 2% 164ms ± 1% -0.79% (p=0.000 n=45+40)
Reflect 434ms ± 4% 435ms ± 4% ~ (p=0.937 n=44+44)
Tar 181ms ± 2% 181ms ± 2% ~ (p=0.702 n=43+45)
XML 244ms ± 2% 244ms ± 2% ~ (p=0.237 n=45+44)
[Geo mean] 403ms 402ms -0.29%
name old user-time/op new user-time/op delta
Template 271ms ± 2% 268ms ± 1% -1.40% (p=0.000 n=42+42)
Unicode 117ms ± 3% 116ms ± 5% ~ (p=0.066 n=45+45)
GoTypes 948ms ± 2% 936ms ± 2% -1.30% (p=0.000 n=41+40)
Compiler 4.26s ± 1% 4.21s ± 2% -1.25% (p=0.000 n=37+45)
SSA 9.52s ± 2% 9.41s ± 1% -1.18% (p=0.000 n=44+45)
Flate 167ms ± 2% 165ms ± 2% -1.15% (p=0.000 n=44+41)
GoParser 201ms ± 2% 198ms ± 1% -1.40% (p=0.000 n=43+43)
Reflect 563ms ± 8% 560ms ± 7% ~ (p=0.206 n=45+44)
Tar 224ms ± 2% 222ms ± 2% -0.81% (p=0.000 n=45+45)
XML 308ms ± 2% 304ms ± 1% -1.17% (p=0.000 n=42+43)
[Geo mean] 525ms 519ms -1.08%
name old alloc/op new alloc/op delta
Template 36.3MB ± 0% 36.3MB ± 0% ~ (p=0.421 n=5+5)
Unicode 28.4MB ± 0% 28.3MB ± 0% ~ (p=0.056 n=5+5)
GoTypes 121MB ± 0% 121MB ± 0% -0.14% (p=0.008 n=5+5)
Compiler 567MB ± 0% 567MB ± 0% -0.06% (p=0.016 n=4+5)
SSA 1.26GB ± 0% 1.26GB ± 0% -0.07% (p=0.008 n=5+5)
Flate 22.9MB ± 0% 22.8MB ± 0% ~ (p=0.310 n=5+5)
GoParser 28.0MB ± 0% 27.9MB ± 0% -0.09% (p=0.008 n=5+5)
Reflect 78.4MB ± 0% 78.4MB ± 0% -0.03% (p=0.008 n=5+5)
Tar 34.2MB ± 0% 34.2MB ± 0% -0.05% (p=0.008 n=5+5)
XML 44.4MB ± 0% 44.4MB ± 0% -0.04% (p=0.016 n=5+5)
[Geo mean] 76.4MB 76.3MB -0.05%
name old allocs/op new allocs/op delta
Template 356k ± 0% 356k ± 0% -0.13% (p=0.008 n=5+5)
Unicode 326k ± 0% 326k ± 0% -0.07% (p=0.008 n=5+5)
GoTypes 1.24M ± 0% 1.24M ± 0% -0.24% (p=0.008 n=5+5)
Compiler 5.30M ± 0% 5.28M ± 0% -0.34% (p=0.008 n=5+5)
SSA 11.9M ± 0% 11.9M ± 0% -0.16% (p=0.008 n=5+5)
Flate 226k ± 0% 225k ± 0% -0.12% (p=0.008 n=5+5)
GoParser 287k ± 0% 286k ± 0% -0.29% (p=0.008 n=5+5)
Reflect 930k ± 0% 929k ± 0% -0.05% (p=0.008 n=5+5)
Tar 332k ± 0% 331k ± 0% -0.12% (p=0.008 n=5+5)
XML 411k ± 0% 411k ± 0% -0.12% (p=0.008 n=5+5)
[Geo mean] 771k 770k -0.16%
For some packages, this change significantly reduces the size of executable text.
Examples:
file before after Δ %
cmd/internal/obj/arm.s 68658 66855 -1803 -2.626%
cmd/internal/obj/mips.s 57486 56272 -1214 -2.112%
cmd/internal/obj/arm64.s 152107 147163 -4944 -3.250%
cmd/internal/obj/ppc64.s 125544 120456 -5088 -4.053%
cmd/vendor/golang.org/x/tools/go/cfg.s 31699 30742 -957 -3.019%
Full listing:
file before after Δ %
container/ring.s 1890 1870 -20 -1.058%
container/list.s 5366 5390 +24 +0.447%
internal/cpu.s 3298 3295 -3 -0.091%
internal/testlog.s 1507 1501 -6 -0.398%
image/color.s 8281 8248 -33 -0.399%
runtime.s 480970 480075 -895 -0.186%
sync.s 16497 16408 -89 -0.539%
internal/singleflight.s 2591 2577 -14 -0.540%
math/rand.s 10456 10438 -18 -0.172%
cmd/go/internal/par.s 2801 2790 -11 -0.393%
internal/reflectlite.s 28477 28417 -60 -0.211%
errors.s 2750 2736 -14 -0.509%
internal/oserror.s 446 434 -12 -2.691%
sort.s 17061 17046 -15 -0.088%
io.s 17063 16999 -64 -0.375%
vendor/golang.org/x/crypto/hkdf.s 1962 1936 -26 -1.325%
text/tabwriter.s 9617 9574 -43 -0.447%
hash/crc64.s 3414 3408 -6 -0.176%
hash/crc32.s 6657 6651 -6 -0.090%
bytes.s 31932 31863 -69 -0.216%
strconv.s 53158 52799 -359 -0.675%
strings.s 42829 42665 -164 -0.383%
encoding/ascii85.s 4833 4791 -42 -0.869%
vendor/golang.org/x/text/transform.s 16810 16724 -86 -0.512%
path.s 6848 6845 -3 -0.044%
encoding/base32.s 9658 9592 -66 -0.683%
bufio.s 23051 22908 -143 -0.620%
compress/bzip2.s 11773 11764 -9 -0.076%
image.s 37565 37502 -63 -0.168%
syscall.s 82359 82279 -80 -0.097%
regexp/syntax.s 83573 82930 -643 -0.769%
image/jpeg.s 36535 36490 -45 -0.123%
regexp.s 64396 64214 -182 -0.283%
time.s 82724 82622 -102 -0.123%
plugin.s 6539 6536 -3 -0.046%
context.s 10959 10865 -94 -0.858%
internal/poll.s 24286 24270 -16 -0.066%
reflect.s 168304 167927 -377 -0.224%
internal/fmtsort.s 7416 7376 -40 -0.539%
os.s 52465 51787 -678 -1.292%
cmd/go/internal/lockedfile/internal/filelock.s 2326 2317 -9 -0.387%
os/signal.s 4657 4648 -9 -0.193%
runtime/debug.s 6040 5998 -42 -0.695%
encoding/binary.s 30838 30801 -37 -0.120%
vendor/golang.org/x/net/route.s 23694 23491 -203 -0.857%
path/filepath.s 17895 17889 -6 -0.034%
cmd/vendor/golang.org/x/sys/unix.s 78125 78109 -16 -0.020%
io/ioutil.s 6999 6996 -3 -0.043%
encoding/base64.s 12094 12007 -87 -0.719%
crypto/cipher.s 20466 20372 -94 -0.459%
cmd/go/internal/robustio.s 2672 2669 -3 -0.112%
encoding/pem.s 9302 9286 -16 -0.172%
internal/obscuretestdata.s 1719 1695 -24 -1.396%
crypto/aes.s 11014 11002 -12 -0.109%
os/exec.s 29388 29231 -157 -0.534%
cmd/internal/browser.s 2266 2260 -6 -0.265%
internal/goroot.s 4601 4592 -9 -0.196%
vendor/golang.org/x/crypto/chacha20poly1305.s 8945 8942 -3 -0.034%
cmd/vendor/golang.org/x/crypto/ssh/terminal.s 27226 27195 -31 -0.114%
index/suffixarray.s 36431 36411 -20 -0.055%
fmt.s 77017 76709 -308 -0.400%
encoding/hex.s 6241 6154 -87 -1.394%
compress/lzw.s 7133 7069 -64 -0.897%
database/sql/driver.s 18888 18877 -11 -0.058%
net/url.s 29838 29739 -99 -0.332%
debug/plan9obj.s 8329 8279 -50 -0.600%
encoding/csv.s 12986 12902 -84 -0.647%
debug/gosym.s 25403 25330 -73 -0.287%
compress/flate.s 51192 50970 -222 -0.434%
vendor/golang.org/x/net/dns/dnsmessage.s 86769 86208 -561 -0.647%
compress/gzip.s 9791 9758 -33 -0.337%
compress/zlib.s 7310 7277 -33 -0.451%
archive/zip.s 42356 42166 -190 -0.449%
debug/dwarf.s 108259 107730 -529 -0.489%
encoding/json.s 106378 105910 -468 -0.440%
os/user.s 14751 14724 -27 -0.183%
database/sql.s 99011 98404 -607 -0.613%
log.s 9466 9423 -43 -0.454%
debug/pe.s 31272 31182 -90 -0.288%
debug/macho.s 32764 32608 -156 -0.476%
encoding/gob.s 136976 136517 -459 -0.335%
vendor/golang.org/x/text/unicode/bidi.s 27318 27276 -42 -0.154%
archive/tar.s 71416 70975 -441 -0.618%
vendor/golang.org/x/net/http2/hpack.s 23892 23848 -44 -0.184%
vendor/golang.org/x/text/secure/bidirule.s 3354 3351 -3 -0.089%
mime/quotedprintable.s 5960 5925 -35 -0.587%
net/http/internal.s 5874 5853 -21 -0.358%
math/big.s 184147 183692 -455 -0.247%
debug/elf.s 63775 63567 -208 -0.326%
mime.s 39802 39709 -93 -0.234%
encoding/xml.s 111038 110713 -325 -0.293%
crypto/dsa.s 6044 6029 -15 -0.248%
go/token.s 12139 12077 -62 -0.511%
crypto/rand.s 6889 6866 -23 -0.334%
go/scanner.s 19030 19008 -22 -0.116%
flag.s 22320 22236 -84 -0.376%
vendor/golang.org/x/text/unicode/norm.s 66652 66391 -261 -0.392%
crypto/rsa.s 31671 31650 -21 -0.066%
crypto/elliptic.s 51553 51403 -150 -0.291%
internal/xcoff.s 22950 22822 -128 -0.558%
go/constant.s 43750 43689 -61 -0.139%
encoding/asn1.s 57086 57035 -51 -0.089%
runtime/trace.s 2609 2603 -6 -0.230%
crypto/x509/pkix.s 10458 10471 +13 +0.124%
image/gif.s 27544 27385 -159 -0.577%
vendor/golang.org/x/net/idna.s 24558 24502 -56 -0.228%
image/png.s 42775 42685 -90 -0.210%
vendor/golang.org/x/crypto/cryptobyte.s 33616 33493 -123 -0.366%
go/ast.s 80684 80449 -235 -0.291%
net/internal/socktest.s 16571 16535 -36 -0.217%
crypto/ecdsa.s 11948 11936 -12 -0.100%
text/template/parse.s 95138 94002 -1136 -1.194%
runtime/pprof.s 59702 59639 -63 -0.106%
testing.s 68427 68088 -339 -0.495%
internal/testenv.s 5620 5596 -24 -0.427%
testing/internal/testdeps.s 3312 3294 -18 -0.543%
internal/trace.s 78473 78239 -234 -0.298%
testing/iotest.s 4968 4908 -60 -1.208%
os/signal/internal/pty.s 3011 2990 -21 -0.697%
testing/quick.s 12179 12125 -54 -0.443%
cmd/internal/bio.s 9286 9274 -12 -0.129%
cmd/internal/src.s 17684 17663 -21 -0.119%
cmd/internal/goobj2.s 12588 12558 -30 -0.238%
cmd/internal/objabi.s 16408 16390 -18 -0.110%
go/printer.s 77417 77308 -109 -0.141%
go/parser.s 80045 79113 -932 -1.164%
go/format.s 5434 5419 -15 -0.276%
cmd/internal/goobj.s 26146 25954 -192 -0.734%
runtime/pprof/internal/profile.s 102518 102178 -340 -0.332%
text/template.s 95343 94935 -408 -0.428%
cmd/internal/dwarf.s 31718 31572 -146 -0.460%
cmd/vendor/golang.org/x/arch/arm/armasm.s 45240 45151 -89 -0.197%
internal/lazytemplate.s 1470 1457 -13 -0.884%
cmd/vendor/golang.org/x/arch/ppc64/ppc64asm.s 37253 37220 -33 -0.089%
cmd/asm/internal/flags.s 2593 2590 -3 -0.116%
cmd/asm/internal/lex.s 25068 24921 -147 -0.586%
cmd/internal/buildid.s 18536 18263 -273 -1.473%
cmd/vendor/golang.org/x/arch/x86/x86asm.s 80209 80105 -104 -0.130%
go/doc.s 75140 74585 -555 -0.739%
cmd/internal/edit.s 3893 3899 +6 +0.154%
html/template.s 89377 88809 -568 -0.636%
cmd/vendor/golang.org/x/arch/arm64/arm64asm.s 117998 117824 -174 -0.147%
cmd/internal/obj.s 115015 114290 -725 -0.630%
go/build.s 69379 68862 -517 -0.745%
cmd/internal/objfile.s 48106 47982 -124 -0.258%
cmd/cover.s 46239 46113 -126 -0.272%
cmd/addr2line.s 2845 2833 -12 -0.422%
cmd/internal/obj/arm.s 68658 66855 -1803 -2.626%
cmd/internal/obj/mips.s 57486 56272 -1214 -2.112%
cmd/internal/obj/riscv.s 63834 63006 -828 -1.297%
cmd/compile/internal/syntax.s 146582 145456 -1126 -0.768%
cmd/internal/obj/wasm.s 44117 44066 -51 -0.116%
cmd/cgo.s 242645 241653 -992 -0.409%
cmd/internal/obj/arm64.s 152107 147163 -4944 -3.250%
net.s 295972 292010 -3962 -1.339%
go/types.s 321371 319432 -1939 -0.603%
vendor/golang.org/x/net/http/httpproxy.s 9450 9423 -27 -0.286%
net/textproto.s 19455 19406 -49 -0.252%
cmd/internal/obj/ppc64.s 125544 120456 -5088 -4.053%
go/internal/srcimporter.s 6475 6409 -66 -1.019%
log/syslog.s 8017 7929 -88 -1.098%
cmd/compile/internal/logopt.s 10183 10162 -21 -0.206%
net/mail.s 24085 23948 -137 -0.569%
mime/multipart.s 21527 21420 -107 -0.497%
cmd/internal/obj/s390x.s 127610 127757 +147 +0.115%
go/internal/gcimporter.s 34913 34548 -365 -1.045%
vendor/golang.org/x/net/nettest.s 28103 28016 -87 -0.310%
cmd/go/internal/cfg.s 9967 9916 -51 -0.512%
cmd/api.s 39703 39603 -100 -0.252%
go/internal/gccgoimporter.s 56470 56120 -350 -0.620%
go/importer.s 2077 2056 -21 -1.011%
cmd/compile/internal/types.s 48202 47282 -920 -1.909%
cmd/go/internal/str.s 4341 4320 -21 -0.484%
cmd/internal/obj/x86.s 89440 88625 -815 -0.911%
cmd/go/internal/base.s 12667 12580 -87 -0.687%
cmd/go/internal/cache.s 30754 30571 -183 -0.595%
cmd/doc.s 62976 62755 -221 -0.351%
cmd/go/internal/search.s 20114 19993 -121 -0.602%
cmd/vendor/golang.org/x/xerrors.s 17923 17855 -68 -0.379%
cmd/go/internal/lockedfile.s 16451 16415 -36 -0.219%
cmd/vendor/golang.org/x/mod/sumdb/note.s 18200 18150 -50 -0.275%
cmd/vendor/golang.org/x/mod/module.s 17869 17851 -18 -0.101%
cmd/asm/internal/arch.s 37533 37482 -51 -0.136%
cmd/fix.s 87728 87492 -236 -0.269%
cmd/vendor/golang.org/x/mod/sumdb/tlog.s 36394 36367 -27 -0.074%
cmd/vendor/golang.org/x/mod/sumdb/dirhash.s 4990 4963 -27 -0.541%
cmd/go/internal/imports.s 16499 16469 -30 -0.182%
cmd/vendor/golang.org/x/mod/zip.s 18816 18745 -71 -0.377%
cmd/go/internal/cmdflag.s 5126 5123 -3 -0.059%
cmd/internal/test2json.s 9540 9452 -88 -0.922%
cmd/go/internal/tool.s 3629 3623 -6 -0.165%
cmd/go/internal/version.s 11232 11220 -12 -0.107%
cmd/go/internal/mvs.s 25383 25179 -204 -0.804%
cmd/nm.s 5815 5803 -12 -0.206%
cmd/dist.s 210146 209140 -1006 -0.479%
cmd/asm/internal/asm.s 68655 68549 -106 -0.154%
cmd/vendor/golang.org/x/mod/modfile.s 72974 72510 -464 -0.636%
cmd/go/internal/load.s 107548 106861 -687 -0.639%
cmd/link/internal/sym.s 18708 18581 -127 -0.679%
cmd/asm.s 3367 3343 -24 -0.713%
cmd/gofmt.s 30795 30698 -97 -0.315%
cmd/link/internal/objfile.s 21828 21630 -198 -0.907%
cmd/pack.s 14878 14869 -9 -0.060%
cmd/vendor/github.com/google/pprof/internal/elfexec.s 6788 6782 -6 -0.088%
cmd/test2json.s 1647 1641 -6 -0.364%
cmd/link/internal/loader.s 48677 48483 -194 -0.399%
cmd/vendor/golang.org/x/tools/go/analysis/internal/analysisflags.s 16783 16773 -10 -0.060%
cmd/link/internal/loadelf.s 35464 35126 -338 -0.953%
cmd/link/internal/loadmacho.s 29438 29180 -258 -0.876%
cmd/link/internal/loadpe.s 16440 16371 -69 -0.420%
cmd/vendor/golang.org/x/tools/go/analysis/passes/internal/analysisutil.s 2106 2100 -6 -0.285%
cmd/link/internal/loadxcoff.s 11711 11615 -96 -0.820%
cmd/vendor/golang.org/x/tools/go/analysis/internal/facts.s 14954 14883 -71 -0.475%
cmd/vendor/golang.org/x/tools/go/ast/inspector.s 5394 5374 -20 -0.371%
cmd/vendor/golang.org/x/tools/go/analysis/passes/asmdecl.s 37029 36822 -207 -0.559%
cmd/vendor/golang.org/x/tools/go/analysis/passes/inspect.s 340 337 -3 -0.882%
cmd/vendor/golang.org/x/tools/go/analysis/passes/cgocall.s 9919 9858 -61 -0.615%
cmd/vendor/golang.org/x/tools/go/analysis/passes/bools.s 6705 6690 -15 -0.224%
cmd/vendor/golang.org/x/tools/go/analysis/passes/copylock.s 9783 9741 -42 -0.429%
cmd/vendor/golang.org/x/tools/go/cfg.s 31699 30742 -957 -3.019%
cmd/vendor/golang.org/x/tools/go/analysis/passes/ifaceassert.s 2768 2762 -6 -0.217%
cmd/vendor/golang.org/x/tools/go/analysis/passes/loopclosure.s 3031 2998 -33 -1.089%
cmd/vendor/golang.org/x/tools/go/analysis/passes/shift.s 4382 4376 -6 -0.137%
cmd/vendor/golang.org/x/tools/go/analysis/passes/stdmethods.s 8654 8642 -12 -0.139%
cmd/vendor/golang.org/x/tools/go/analysis/passes/stringintconv.s 3458 3446 -12 -0.347%
cmd/vendor/golang.org/x/tools/go/analysis/passes/structtag.s 8011 7995 -16 -0.200%
cmd/vendor/golang.org/x/tools/go/analysis/passes/tests.s 6205 6193 -12 -0.193%
cmd/vendor/golang.org/x/tools/go/ast/astutil.s 66183 65861 -322 -0.487%
cmd/vendor/github.com/google/pprof/profile.s 150844 150261 -583 -0.386%
cmd/vendor/golang.org/x/tools/go/analysis/passes/unreachable.s 8057 8054 -3 -0.037%
cmd/vendor/golang.org/x/tools/go/analysis/passes/unusedresult.s 3670 3667 -3 -0.082%
cmd/vendor/github.com/google/pprof/internal/measurement.s 10464 10440 -24 -0.229%
cmd/vendor/golang.org/x/tools/go/types/typeutil.s 12319 12274 -45 -0.365%
cmd/vendor/golang.org/x/tools/go/analysis/unitchecker.s 13503 13342 -161 -1.192%
cmd/vendor/golang.org/x/tools/go/analysis/passes/ctrlflow.s 5261 5218 -43 -0.817%
cmd/vendor/golang.org/x/tools/go/analysis/passes/errorsas.s 1462 1459 -3 -0.205%
cmd/vendor/golang.org/x/tools/go/analysis/passes/lostcancel.s 9594 9582 -12 -0.125%
cmd/vendor/golang.org/x/tools/go/analysis/passes/printf.s 34397 34338 -59 -0.172%
cmd/vendor/github.com/google/pprof/internal/graph.s 53225 52936 -289 -0.543%
cmd/vendor/github.com/ianlancetaylor/demangle.s 177450 175329 -2121 -1.195%
crypto/x509.s 147892 147388 -504 -0.341%
cmd/go/internal/work.s 306465 304950 -1515 -0.494%
cmd/go/internal/run.s 4664 4657 -7 -0.150%
crypto/tls.s 313130 311833 -1297 -0.414%
net/http/httptrace.s 3979 3905 -74 -1.860%
net/smtp.s 14413 14344 -69 -0.479%
cmd/link/internal/ld.s 545343 542279 -3064 -0.562%
cmd/link/internal/mips.s 6218 6215 -3 -0.048%
cmd/link/internal/mips64.s 6108 6103 -5 -0.082%
cmd/link/internal/amd64.s 18154 18112 -42 -0.231%
cmd/link/internal/arm64.s 22527 22494 -33 -0.146%
cmd/link/internal/arm.s 22574 22494 -80 -0.354%
cmd/link/internal/s390x.s 20779 20746 -33 -0.159%
cmd/link/internal/wasm.s 16531 16493 -38 -0.230%
cmd/link/internal/x86.s 18906 18849 -57 -0.301%
cmd/link/internal/ppc64.s 26856 26778 -78 -0.290%
net/http.s 559101 556513 -2588 -0.463%
net/http/cookiejar.s 15912 15885 -27 -0.170%
expvar.s 9531 9525 -6 -0.063%
net/http/httptest.s 16616 16475 -141 -0.849%
net/http/cgi.s 23624 23458 -166 -0.703%
cmd/go/internal/web.s 16546 16489 -57 -0.344%
cmd/vendor/golang.org/x/mod/sumdb.s 33197 33117 -80 -0.241%
net/http/fcgi.s 19266 19169 -97 -0.503%
net/http/httputil.s 39875 39728 -147 -0.369%
cmd/vendor/github.com/google/pprof/internal/symbolz.s 5888 5867 -21 -0.357%
net/rpc.s 34154 34003 -151 -0.442%
cmd/vendor/github.com/google/pprof/internal/transport.s 2746 2716 -30 -1.092%
cmd/vendor/github.com/google/pprof/internal/binutils.s 35999 35875 -124 -0.344%
net/rpc/jsonrpc.s 6637 6598 -39 -0.588%
cmd/vendor/github.com/google/pprof/internal/symbolizer.s 11533 11458 -75 -0.650%
cmd/go/internal/get.s 62921 62803 -118 -0.188%
cmd/vendor/github.com/google/pprof/internal/report.s 80364 80058 -306 -0.381%
cmd/go/internal/modfetch/codehost.s 89680 89066 -614 -0.685%
cmd/trace.s 117171 116701 -470 -0.401%
cmd/vendor/github.com/google/pprof/internal/driver.s 144268 143297 -971 -0.673%
cmd/go/internal/modfetch.s 126299 125860 -439 -0.348%
cmd/vendor/github.com/google/pprof/driver.s 9042 9000 -42 -0.464%
cmd/go/internal/modconv.s 17947 17889 -58 -0.323%
cmd/pprof.s 12399 12326 -73 -0.589%
cmd/go/internal/modload.s 151182 150389 -793 -0.525%
cmd/go/internal/generate.s 11738 11636 -102 -0.869%
cmd/go/internal/help.s 6571 6531 -40 -0.609%
cmd/go/internal/clean.s 11174 11142 -32 -0.286%
cmd/go/internal/vet.s 7897 7867 -30 -0.380%
cmd/go/internal/envcmd.s 22176 22095 -81 -0.365%
cmd/go/internal/list.s 15216 15067 -149 -0.979%
cmd/go/internal/modget.s 38698 38519 -179 -0.463%
cmd/go/internal/modcmd.s 46674 46441 -233 -0.499%
cmd/go/internal/test.s 64664 64456 -208 -0.322%
cmd/go.s 6730 6703 -27 -0.401%
cmd/compile/internal/ssa.s 3592565 3582500 -10065 -0.280%
cmd/compile/internal/gc.s 1549123 1537123 -12000 -0.775%
cmd/compile/internal/riscv64.s 14579 14483 -96 -0.658%
cmd/compile/internal/mips.s 20578 20419 -159 -0.773%
cmd/compile/internal/ppc64.s 25524 25359 -165 -0.646%
cmd/compile/internal/mips64.s 19795 19636 -159 -0.803%
cmd/compile/internal/wasm.s 13329 13290 -39 -0.293%
cmd/compile/internal/s390x.s 28097 27892 -205 -0.730%
cmd/compile/internal/arm.s 31489 31321 -168 -0.534%
cmd/compile/internal/arm64.s 29803 29590 -213 -0.715%
cmd/compile/internal/amd64.s 32961 33221 +260 +0.789%
cmd/compile/internal/x86.s 31029 30878 -151 -0.487%
total 18534966 18440341 -94625 -0.511%
Change-Id: I830d37364f14f0297800adc42c99f60a74c51aca
Reviewed-on: https://go-review.googlesource.com/c/go/+/226367
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-03-24 22:14:02 -07:00
|
|
|
// arg0 is in DI. Set sym to match where regalloc put arg1.
|
|
|
|
|
p.To.Sym = gc.GCWriteBarrierReg[v.Args[1].Reg()]
|
2017-10-26 12:33:04 -04:00
|
|
|
|
2019-02-06 14:12:36 -08:00
|
|
|
case ssa.OpAMD64LoweredPanicBoundsA, ssa.OpAMD64LoweredPanicBoundsB, ssa.OpAMD64LoweredPanicBoundsC:
|
|
|
|
|
p := s.Prog(obj.ACALL)
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Name = obj.NAME_EXTERN
|
|
|
|
|
p.To.Sym = gc.BoundsCheckFunc[v.AuxInt]
|
|
|
|
|
s.UseArgs(int64(2 * gc.Widthptr)) // space used in callee args area by assembly stubs
|
|
|
|
|
|
2016-04-22 13:09:18 -07:00
|
|
|
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
|
2016-03-11 00:10:52 -05:00
|
|
|
ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
|
2016-04-22 13:09:18 -07:00
|
|
|
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-04-10 08:26:43 -07:00
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
2018-10-23 14:05:38 -07:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64NEGLflags:
|
|
|
|
|
r := v.Reg0()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
|
|
|
|
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = r
|
|
|
|
|
|
2018-10-08 02:20:03 +00:00
|
|
|
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2018-10-08 02:20:03 +00:00
|
|
|
switch v.Op {
|
|
|
|
|
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
|
|
|
|
|
p.To.Reg = v.Reg0()
|
|
|
|
|
case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD:
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
}
|
2017-10-05 15:45:46 -05:00
|
|
|
case ssa.OpAMD64ROUNDSD:
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
val := v.AuxInt
|
2017-10-31 16:49:27 -05:00
|
|
|
// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
|
|
|
|
|
if val != 0 && val != 1 && val != 2 && val != 3 {
|
2017-10-05 15:45:46 -05:00
|
|
|
v.Fatalf("Invalid rounding mode")
|
|
|
|
|
}
|
|
|
|
|
p.From.Offset = val
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2017-03-16 21:33:03 -07:00
|
|
|
case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL:
|
|
|
|
|
if v.Args[0].Reg() != v.Reg() {
|
|
|
|
|
// POPCNT on Intel has a false dependency on the destination register.
|
2017-12-07 10:56:45 -06:00
|
|
|
// Xor register with itself to break the dependency.
|
|
|
|
|
p := s.Prog(x86.AXORQ)
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Reg()
|
2017-03-16 21:33:03 -07:00
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
|
|
|
|
}
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = v.Args[0].Reg()
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = v.Reg()
|
2017-10-03 14:12:00 -05:00
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
|
|
|
|
|
ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
|
|
|
|
|
ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
|
|
|
|
|
ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
|
|
|
|
|
ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
|
|
|
|
|
ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
|
2018-01-27 11:55:34 +01:00
|
|
|
ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
|
|
|
|
|
ssa.OpAMD64SETO:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2016-03-12 14:07:40 -08:00
|
|
|
|
2018-05-08 09:11:00 -07:00
|
|
|
case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
|
|
|
|
|
ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
|
|
|
|
|
ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
|
|
|
|
|
ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
|
|
|
|
|
ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
|
2017-10-03 14:12:00 -05:00
|
|
|
p := s.Prog(v.Op.Asm())
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = v.Args[0].Reg()
|
|
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64SETNEF:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.ASETPS)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = x86.REG_AX
|
|
|
|
|
// ORL avoids partial register write and is smaller than ORQ, used by old compiler
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AORL, v.Reg(), x86.REG_AX)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64SETEQF:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg()
|
2017-03-20 08:01:28 -07:00
|
|
|
q := s.Prog(x86.ASETPC)
|
2016-03-12 14:07:40 -08:00
|
|
|
q.To.Type = obj.TYPE_REG
|
|
|
|
|
q.To.Reg = x86.REG_AX
|
|
|
|
|
// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
|
2017-03-20 08:01:28 -07:00
|
|
|
opregreg(s, x86.AANDL, v.Reg(), x86.REG_AX)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.OpAMD64InvertFlags:
|
2016-03-11 00:10:52 -05:00
|
|
|
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
|
2016-03-11 00:10:52 -05:00
|
|
|
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
|
2016-08-28 11:17:37 -07:00
|
|
|
case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
|
|
|
|
|
v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64REPSTOSQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.AREP)
|
|
|
|
|
s.Prog(x86.ASTOSQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64REPMOVSQ:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.AREP)
|
|
|
|
|
s.Prog(x86.AMOVSQ)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.OpAMD64LoweredNilCheck:
|
|
|
|
|
// Issue a load which will fault if the input is nil.
|
|
|
|
|
// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
|
2017-08-19 22:33:51 +02:00
|
|
|
// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
|
2016-03-12 14:07:40 -08:00
|
|
|
// but it doesn't have false dependency on AX.
|
|
|
|
|
// Or maybe allocate an output register and use MOVL (reg),reg2 ?
|
|
|
|
|
// That trades clobbering flags for clobbering a register.
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.ATESTB)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_AX
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
cmd/compile: add framework for logging optimizer (non)actions to LSP
This is intended to allow IDEs to note where the optimizer
was not able to improve users' code. There may be other
applications for this, for example in studying effectiveness
of optimizer changes more quickly than running benchmarks,
or in verifying that code changes did not accidentally disable
optimizations in performance-critical code.
Logging of nilcheck (bad) for amd64 is implemented as
proof-of-concept. In general, the intent is that optimizations
that didn't happen are what will be logged, because that is
believed to be what IDE users want.
Added flag -json=version,dest
Check that version=0. (Future compilers will support a
few recent versions, I hope that version is always <=3.)
Dest is expected to be one of:
/path (or \path in Windows)
will create directory /path and fill it w/ json files
file://path
will create directory path, intended either for
I:\dont\know\enough\about\windows\paths
trustme_I_know_what_I_am_doing_probably_testing
Not passing an absolute path name usually leads to
json splattered all over source directories,
or failure when those directories are not writeable.
If you want a foot-gun, you have to ask for it.
The JSON output is directed to subdirectories of dest,
where each subdirectory is net/url.PathEscape of the
package name, and each for each foo.go in the package,
net/url.PathEscape(foo).json is created. The first line
of foo.json contains version and context information,
and subsequent lines contains LSP-conforming JSON
describing the missing optimizations.
Change-Id: Ib83176a53a8c177ee9081aefc5ae05604ccad8a0
Reviewed-on: https://go-review.googlesource.com/c/go/+/204338
Run-TryBot: David Chase <drchase@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2019-10-24 13:48:17 -04:00
|
|
|
if logopt.Enabled() {
|
|
|
|
|
logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
|
|
|
|
|
}
|
2016-12-07 18:14:35 -08:00
|
|
|
if gc.Debug_checknil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
|
|
|
|
|
gc.Warnl(v.Pos, "generated nil check")
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
2019-03-28 14:58:06 -04:00
|
|
|
case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-23 16:49:28 -07:00
|
|
|
p.From.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[0].Reg()
|
2016-08-23 16:49:28 -07:00
|
|
|
gc.AddAux(&p.From, v)
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg0()
|
2019-10-23 10:20:49 -04:00
|
|
|
case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg0()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-08-23 16:49:28 -07:00
|
|
|
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-23 16:49:28 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[1].Reg()
|
2016-08-23 16:49:28 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-08-25 16:02:57 -07:00
|
|
|
case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
|
2016-09-16 09:36:00 -07:00
|
|
|
r := v.Reg0()
|
|
|
|
|
if r != v.Args[0].Reg() {
|
2016-08-25 16:02:57 -07:00
|
|
|
v.Fatalf("input[0] and output[0] not in same register %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = r
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[1].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
|
2016-09-16 09:36:00 -07:00
|
|
|
if v.Args[1].Reg() != x86.REG_AX {
|
2016-08-25 16:02:57 -07:00
|
|
|
v.Fatalf("input[1] not in AX %s", v.LongString())
|
|
|
|
|
}
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[2].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.ASETEQ)
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Reg0()
|
2016-08-25 16:02:57 -07:00
|
|
|
case ssa.OpAMD64ANDBlock, ssa.OpAMD64ORBlock:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(x86.ALOCK)
|
|
|
|
|
p := s.Prog(v.Op.Asm())
|
2016-08-25 16:02:57 -07:00
|
|
|
p.From.Type = obj.TYPE_REG
|
2016-09-16 09:36:00 -07:00
|
|
|
p.From.Reg = v.Args[1].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
2016-09-16 09:36:00 -07:00
|
|
|
p.To.Reg = v.Args[0].Reg()
|
2016-08-25 16:02:57 -07:00
|
|
|
gc.AddAux(&p.To, v)
|
2016-06-08 22:02:08 -07:00
|
|
|
case ssa.OpClobber:
|
|
|
|
|
p := s.Prog(x86.AMOVL)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 0xdeaddead
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = x86.REG_SP
|
|
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
p = s.Prog(x86.AMOVL)
|
|
|
|
|
p.From.Type = obj.TYPE_CONST
|
|
|
|
|
p.From.Offset = 0xdeaddead
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Reg = x86.REG_SP
|
|
|
|
|
gc.AddAux(&p.To, v)
|
|
|
|
|
p.To.Offset += 4
|
2016-03-12 14:07:40 -08:00
|
|
|
default:
|
2016-09-14 10:01:05 -07:00
|
|
|
v.Fatalf("genValue not implemented: %s", v.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var blockJump = [...]struct {
|
|
|
|
|
asm, invasm obj.As
|
|
|
|
|
}{
|
|
|
|
|
ssa.BlockAMD64EQ: {x86.AJEQ, x86.AJNE},
|
|
|
|
|
ssa.BlockAMD64NE: {x86.AJNE, x86.AJEQ},
|
|
|
|
|
ssa.BlockAMD64LT: {x86.AJLT, x86.AJGE},
|
|
|
|
|
ssa.BlockAMD64GE: {x86.AJGE, x86.AJLT},
|
|
|
|
|
ssa.BlockAMD64LE: {x86.AJLE, x86.AJGT},
|
|
|
|
|
ssa.BlockAMD64GT: {x86.AJGT, x86.AJLE},
|
2018-01-27 11:55:34 +01:00
|
|
|
ssa.BlockAMD64OS: {x86.AJOS, x86.AJOC},
|
|
|
|
|
ssa.BlockAMD64OC: {x86.AJOC, x86.AJOS},
|
2016-03-12 14:07:40 -08:00
|
|
|
ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
|
|
|
|
|
ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
|
|
|
|
|
ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
|
|
|
|
|
ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
|
|
|
|
|
ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
|
|
|
|
|
ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var eqfJumps = [2][2]gc.FloatingEQNEJump{
|
2016-04-29 09:02:27 -07:00
|
|
|
{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
|
|
|
|
|
{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
var nefJumps = [2][2]gc.FloatingEQNEJump{
|
2016-04-29 09:02:27 -07:00
|
|
|
{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
|
|
|
|
|
{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ssaGenBlock(s *gc.SSAGenState, b, next *ssa.Block) {
|
|
|
|
|
switch b.Kind {
|
2016-09-13 17:01:01 -07:00
|
|
|
case ssa.BlockPlain:
|
2016-04-28 16:52:47 -07:00
|
|
|
if b.Succs[0].Block() != next {
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.BlockDefer:
|
|
|
|
|
// defer returns in rax:
|
|
|
|
|
// 0 if we should continue executing
|
|
|
|
|
// 1 if we should jump to deferreturn call
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(x86.ATESTL)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.From.Type = obj.TYPE_REG
|
|
|
|
|
p.From.Reg = x86.REG_AX
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = x86.REG_AX
|
2017-03-20 08:01:28 -07:00
|
|
|
p = s.Prog(x86.AJNE)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[1].Block()})
|
|
|
|
|
if b.Succs[0].Block() != next {
|
2017-03-20 08:01:28 -07:00
|
|
|
p := s.Prog(obj.AJMP)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_BRANCH
|
2016-04-28 16:52:47 -07:00
|
|
|
s.Branches = append(s.Branches, gc.Branch{P: p, B: b.Succs[0].Block()})
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
case ssa.BlockExit:
|
|
|
|
|
case ssa.BlockRet:
|
2017-03-20 08:01:28 -07:00
|
|
|
s.Prog(obj.ARET)
|
2016-03-12 14:07:40 -08:00
|
|
|
case ssa.BlockRetJmp:
|
cmd/internal/obj/x86: adjust SP correctly for tail calls
Currently, tail calls on x86 don't adjust the SP on return, so it's
important that the compiler produce a zero-sized frame and disable the
frame pointer. However, these constraints aren't necessary. For
example, on other architectures it's generally necessary to restore
the saved LR before a tail call, so obj simply makes this work.
Likewise, on x86, there's no reason we can't simply make this work.
Hence, this CL adjusts the compiler to use the same tail call
convention for x86 that we use on LR machines by producing a RET with
a target, rather than a JMP with a target. In fact, obj already
understands this convention for x86 except that it's buggy with
non-zero frame sizes. So we also fix this bug obj. As a result of
these fixes, the compiler no longer needs to mark wrappers as
NoFramePointer since it's now perfectly fine to save the frame
pointer.
In fact, this eliminates the only use of NoFramePointer in the
compiler, which will enable further cleanups.
This also fixes what is very nearly, but not quite, a code generation
bug. NoFramePointer becomes obj.NOFRAME in the object file, which on
ppc64 and s390x means to omit the saved LR. Hence, on these
architectures, NoFramePointer (and NOFRAME) is only safe to set on
leaf functions. However, on *most* architectures, wrappers aren't
necessarily leaf functions because they may call DUFFZERO. We're saved
on ppc64 and s390x only because the compiler doesn't have the rules to
produce DUFFZERO calls on these architectures. Hence, this only works
because the set of LR architectures that implement NOFRAME is disjoint
from the set where the compiler produces DUFFZERO operations. (I
discovered this whole mess when I attempted to add NOFRAME support to
arm.)
Change-Id: Icc589aeb86beacb850d0a6a80bd3024974a33947
Reviewed-on: https://go-review.googlesource.com/92035
Run-TryBot: Austin Clements <austin@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-01-25 11:35:27 -05:00
|
|
|
p := s.Prog(obj.ARET)
|
2016-03-12 14:07:40 -08:00
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Name = obj.NAME_EXTERN
|
2017-02-06 13:30:40 -08:00
|
|
|
p.To.Sym = b.Aux.(*obj.LSym)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64EQF:
|
2017-03-22 10:27:30 -07:00
|
|
|
s.FPJump(b, next, &eqfJumps)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64NEF:
|
2017-03-22 10:27:30 -07:00
|
|
|
s.FPJump(b, next, &nefJumps)
|
2016-03-12 14:07:40 -08:00
|
|
|
|
|
|
|
|
case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
|
|
|
|
|
ssa.BlockAMD64LT, ssa.BlockAMD64GE,
|
|
|
|
|
ssa.BlockAMD64LE, ssa.BlockAMD64GT,
|
2018-01-27 11:55:34 +01:00
|
|
|
ssa.BlockAMD64OS, ssa.BlockAMD64OC,
|
2016-03-12 14:07:40 -08:00
|
|
|
ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
|
|
|
|
|
ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
|
|
|
|
|
jmp := blockJump[b.Kind]
|
|
|
|
|
switch next {
|
2016-04-28 16:52:47 -07:00
|
|
|
case b.Succs[0].Block():
|
2018-04-05 16:14:42 -04:00
|
|
|
s.Br(jmp.invasm, b.Succs[1].Block())
|
2016-04-28 16:52:47 -07:00
|
|
|
case b.Succs[1].Block():
|
2018-04-05 16:14:42 -04:00
|
|
|
s.Br(jmp.asm, b.Succs[0].Block())
|
2016-03-12 14:07:40 -08:00
|
|
|
default:
|
2018-04-05 16:14:42 -04:00
|
|
|
if b.Likely != ssa.BranchUnlikely {
|
|
|
|
|
s.Br(jmp.asm, b.Succs[0].Block())
|
|
|
|
|
s.Br(obj.AJMP, b.Succs[1].Block())
|
|
|
|
|
} else {
|
|
|
|
|
s.Br(jmp.invasm, b.Succs[1].Block())
|
|
|
|
|
s.Br(obj.AJMP, b.Succs[0].Block())
|
|
|
|
|
}
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
2019-08-12 20:19:58 +01:00
|
|
|
b.Fatalf("branch not implemented: %s", b.LongString())
|
2016-03-12 14:07:40 -08:00
|
|
|
}
|
|
|
|
|
}
|