2015-02-13 14:40:36 -05:00
|
|
|
// Derived from Inferno utils/6c/txt.c
|
|
|
|
|
// http://code.google.com/p/inferno-os/source/browse/utils/6c/txt.c
|
|
|
|
|
//
|
|
|
|
|
// Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved.
|
|
|
|
|
// Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
|
// Portions Copyright © 1997-1999 Vita Nuova Limited
|
|
|
|
|
// Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
|
|
|
|
|
// Portions Copyright © 2004,2006 Bruce Ellis
|
|
|
|
|
// Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
|
|
|
|
|
// Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
|
|
|
|
|
// Portions Copyright © 2009 The Go Authors. All rights reserved.
|
|
|
|
|
//
|
|
|
|
|
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
|
|
|
// of this software and associated documentation files (the "Software"), to deal
|
|
|
|
|
// in the Software without restriction, including without limitation the rights
|
|
|
|
|
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
// copies of the Software, and to permit persons to whom the Software is
|
|
|
|
|
// furnished to do so, subject to the following conditions:
|
|
|
|
|
//
|
|
|
|
|
// The above copyright notice and this permission notice shall be included in
|
|
|
|
|
// all copies or substantial portions of the Software.
|
|
|
|
|
//
|
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
|
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
|
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
|
|
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
|
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
|
|
|
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
|
|
|
// THE SOFTWARE.
|
|
|
|
|
|
2015-05-21 13:28:10 -04:00
|
|
|
package amd64
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
import (
|
2015-05-21 13:28:10 -04:00
|
|
|
"cmd/compile/internal/big"
|
|
|
|
|
"cmd/compile/internal/gc"
|
2015-02-13 14:40:36 -05:00
|
|
|
"cmd/internal/obj"
|
|
|
|
|
"cmd/internal/obj/x86"
|
|
|
|
|
"fmt"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var resvd = []int{
|
|
|
|
|
x86.REG_DI, // for movstring
|
|
|
|
|
x86.REG_SI, // for movstring
|
|
|
|
|
|
|
|
|
|
x86.REG_AX, // for divide
|
|
|
|
|
x86.REG_CX, // for shift
|
|
|
|
|
x86.REG_DX, // for divide
|
|
|
|
|
x86.REG_SP, // for stack
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* generate
|
|
|
|
|
* as $c, reg
|
|
|
|
|
*/
|
2016-03-07 18:00:08 -08:00
|
|
|
func gconreg(as obj.As, c int64, reg int) {
|
2015-02-13 14:40:36 -05:00
|
|
|
var nr gc.Node
|
|
|
|
|
|
|
|
|
|
switch as {
|
|
|
|
|
case x86.AADDL,
|
|
|
|
|
x86.AMOVL,
|
|
|
|
|
x86.ALEAL:
|
|
|
|
|
gc.Nodreg(&nr, gc.Types[gc.TINT32], reg)
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gc.Nodreg(&nr, gc.Types[gc.TINT64], reg)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ginscon(as, c, &nr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* generate
|
|
|
|
|
* as $c, n
|
|
|
|
|
*/
|
2016-03-07 18:00:08 -08:00
|
|
|
func ginscon(as obj.As, c int64, n2 *gc.Node) {
|
2015-02-13 14:40:36 -05:00
|
|
|
var n1 gc.Node
|
|
|
|
|
|
|
|
|
|
switch as {
|
|
|
|
|
case x86.AADDL,
|
|
|
|
|
x86.AMOVL,
|
|
|
|
|
x86.ALEAL:
|
|
|
|
|
gc.Nodconst(&n1, gc.Types[gc.TINT32], c)
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
gc.Nodconst(&n1, gc.Types[gc.TINT64], c)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if as != x86.AMOVQ && (c < -(1<<31) || c >= 1<<31) {
|
|
|
|
|
// cannot have 64-bit immediate in ADD, etc.
|
|
|
|
|
// instead, MOV into register first.
|
2015-02-23 16:07:24 -05:00
|
|
|
var ntmp gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&ntmp, gc.Types[gc.TINT64], nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
gins(x86.AMOVQ, &n1, &ntmp)
|
|
|
|
|
gins(as, &ntmp, n2)
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&ntmp)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gins(as, &n1, n2)
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
func ginscmp(op gc.Op, t *gc.Type, n1, n2 *gc.Node, likely int) *obj.Prog {
|
2015-05-06 12:28:19 -04:00
|
|
|
if gc.Isint[t.Etype] && n1.Op == gc.OLITERAL && gc.Smallintconst(n1) && n2.Op != gc.OLITERAL {
|
|
|
|
|
// Reverse comparison to place constant last.
|
|
|
|
|
op = gc.Brrev(op)
|
|
|
|
|
n1, n2 = n2, n1
|
|
|
|
|
}
|
|
|
|
|
// General case.
|
|
|
|
|
var r1, r2, g1, g2 gc.Node
|
2015-11-14 15:42:49 -08:00
|
|
|
|
|
|
|
|
// A special case to make write barriers more efficient.
|
|
|
|
|
// Comparing the first field of a named struct can be done directly.
|
|
|
|
|
base := n1
|
cmd/compile: change ODOT and friends to use Sym, not Right
The Node type ODOT and its variants all represent a selector, with a
simple name to the right of the dot. Before this change this was
represented by using an ONAME Node in the Right field. This ONAME node
served no useful purpose. This CL changes these Node types to store the
symbol in the Sym field instead, thus not requiring allocating a Node
for each selector.
When compiling x/tools/go/types this CL eliminates nearly 5000 calls to
newname and reduces the total number of Nodes allocated by about 6.6%.
It seems to cut compilation time by 1 to 2 percent.
Getting this right was somewhat subtle, and I added two dubious changes
to produce the exact same output as before. One is to ishairy in
inl.go: the ONAME node increased the cost of ODOT and friends by 1, and
I retained that, although really ODOT is not more expensive than any
other node. The other is to varexpr in walk.go: because the ONAME in
the Right field of an ODOT has no class, varexpr would always return
false for an ODOT, although in fact for some ODOT's it seemingly ought
to return true; I added an && false for now. I will send separate CLs,
that will break toolstash -cmp, to clean these up.
This CL passes toolstash -cmp.
Change-Id: I4af8a10cc59078c436130ce472f25abc3a9b2f80
Reviewed-on: https://go-review.googlesource.com/20890
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2016-03-18 16:52:30 -07:00
|
|
|
if n1.Op == gc.ODOT && n1.Left.Type.Etype == gc.TSTRUCT && n1.Left.Type.Field(0).Sym == n1.Sym {
|
2015-11-14 15:42:49 -08:00
|
|
|
base = n1.Left
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if base.Op == gc.ONAME && base.Class&gc.PHEAP == 0 || n1.Op == gc.OINDREG {
|
2015-05-06 12:28:19 -04:00
|
|
|
r1 = *n1
|
|
|
|
|
} else {
|
|
|
|
|
gc.Regalloc(&r1, t, n1)
|
|
|
|
|
gc.Regalloc(&g1, n1.Type, &r1)
|
|
|
|
|
gc.Cgen(n1, &g1)
|
|
|
|
|
gmove(&g1, &r1)
|
|
|
|
|
}
|
|
|
|
|
if n2.Op == gc.OLITERAL && gc.Isint[t.Etype] && gc.Smallintconst(n2) {
|
|
|
|
|
r2 = *n2
|
|
|
|
|
} else {
|
|
|
|
|
gc.Regalloc(&r2, t, n2)
|
|
|
|
|
gc.Regalloc(&g2, n1.Type, &r2)
|
|
|
|
|
gc.Cgen(n2, &g2)
|
|
|
|
|
gmove(&g2, &r2)
|
|
|
|
|
}
|
|
|
|
|
gins(optoas(gc.OCMP, t), &r1, &r2)
|
|
|
|
|
if r1.Op == gc.OREGISTER {
|
|
|
|
|
gc.Regfree(&g1)
|
|
|
|
|
gc.Regfree(&r1)
|
|
|
|
|
}
|
|
|
|
|
if r2.Op == gc.OREGISTER {
|
|
|
|
|
gc.Regfree(&g2)
|
|
|
|
|
gc.Regfree(&r2)
|
|
|
|
|
}
|
|
|
|
|
return gc.Gbranch(optoas(op, t), nil, likely)
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-07 18:00:08 -08:00
|
|
|
func ginsboolval(a obj.As, n *gc.Node) {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
gins(jmptoset(a), nil, n)
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-22 20:08:03 -07:00
|
|
|
// set up nodes representing 2^63
|
|
|
|
|
var (
|
|
|
|
|
bigi gc.Node
|
|
|
|
|
bigf gc.Node
|
|
|
|
|
bignodes_did bool
|
|
|
|
|
)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
func bignodes() {
|
2015-04-22 20:08:03 -07:00
|
|
|
if bignodes_did {
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
2015-04-22 20:08:03 -07:00
|
|
|
bignodes_did = true
|
|
|
|
|
|
|
|
|
|
var i big.Int
|
|
|
|
|
i.SetInt64(1)
|
|
|
|
|
i.Lsh(&i, 63)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-22 20:08:03 -07:00
|
|
|
gc.Nodconst(&bigi, gc.Types[gc.TUINT64], 0)
|
|
|
|
|
bigi.SetBigInt(&i)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-05-07 18:43:03 -07:00
|
|
|
bigi.Convconst(&bigf, gc.Types[gc.TFLOAT64])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* generate move:
|
|
|
|
|
* t = f
|
|
|
|
|
* hard part is conversions.
|
|
|
|
|
*/
|
|
|
|
|
func gmove(f *gc.Node, t *gc.Node) {
|
|
|
|
|
if gc.Debug['M'] != 0 {
|
2016-03-15 13:06:58 -07:00
|
|
|
fmt.Printf("gmove %v -> %v\n", gc.Nconv(f, gc.FmtLong), gc.Nconv(t, gc.FmtLong))
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
ft := gc.Simsimtype(f.Type)
|
|
|
|
|
tt := gc.Simsimtype(t.Type)
|
|
|
|
|
cvt := t.Type
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-01 07:54:01 +00:00
|
|
|
if gc.Iscomplex[ft] || gc.Iscomplex[tt] {
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Complexmove(f, t)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// cannot have two memory operands
|
2016-03-07 18:00:08 -08:00
|
|
|
var a obj.As
|
2015-02-17 22:13:49 -05:00
|
|
|
if gc.Ismem(f) && gc.Ismem(t) {
|
2015-02-13 14:40:36 -05:00
|
|
|
goto hard
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// convert constant to desired type
|
|
|
|
|
if f.Op == gc.OLITERAL {
|
2015-02-23 16:07:24 -05:00
|
|
|
var con gc.Node
|
2015-05-07 18:43:03 -07:00
|
|
|
f.Convconst(&con, t.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
f = &con
|
|
|
|
|
ft = tt // so big switch will choose a simple mov
|
|
|
|
|
|
|
|
|
|
// some constants can't move directly to memory.
|
2015-02-17 22:13:49 -05:00
|
|
|
if gc.Ismem(t) {
|
2015-02-13 14:40:36 -05:00
|
|
|
// float constants come from memory.
|
2015-03-01 07:54:01 +00:00
|
|
|
if gc.Isfloat[tt] {
|
2015-02-13 14:40:36 -05:00
|
|
|
goto hard
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 64-bit immediates are really 32-bit sign-extended
|
|
|
|
|
// unless moving into a register.
|
2015-03-01 07:54:01 +00:00
|
|
|
if gc.Isint[tt] {
|
2015-04-22 20:08:03 -07:00
|
|
|
if i := con.Int(); int64(int32(i)) != i {
|
2015-02-13 14:40:36 -05:00
|
|
|
goto hard
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// value -> value copy, only one memory operand.
|
|
|
|
|
// figure out the instruction to use.
|
|
|
|
|
// break out of switch for one-instruction gins.
|
|
|
|
|
// goto rdst for "destination must be register".
|
|
|
|
|
// goto hard for "convert to cvt type first".
|
|
|
|
|
// otherwise handle and return.
|
|
|
|
|
|
|
|
|
|
switch uint32(ft)<<16 | uint32(tt) {
|
|
|
|
|
default:
|
2016-03-15 13:06:58 -07:00
|
|
|
gc.Fatalf("gmove %v -> %v", gc.Tconv(f.Type, gc.FmtLong), gc.Tconv(t.Type, gc.FmtLong))
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* integer copy and truncate
|
|
|
|
|
*/
|
|
|
|
|
case gc.TINT8<<16 | gc.TINT8, // same size
|
|
|
|
|
gc.TINT8<<16 | gc.TUINT8,
|
|
|
|
|
gc.TUINT8<<16 | gc.TINT8,
|
|
|
|
|
gc.TUINT8<<16 | gc.TUINT8,
|
|
|
|
|
gc.TINT16<<16 | gc.TINT8,
|
|
|
|
|
// truncate
|
|
|
|
|
gc.TUINT16<<16 | gc.TINT8,
|
|
|
|
|
gc.TINT32<<16 | gc.TINT8,
|
|
|
|
|
gc.TUINT32<<16 | gc.TINT8,
|
|
|
|
|
gc.TINT64<<16 | gc.TINT8,
|
|
|
|
|
gc.TUINT64<<16 | gc.TINT8,
|
|
|
|
|
gc.TINT16<<16 | gc.TUINT8,
|
|
|
|
|
gc.TUINT16<<16 | gc.TUINT8,
|
|
|
|
|
gc.TINT32<<16 | gc.TUINT8,
|
|
|
|
|
gc.TUINT32<<16 | gc.TUINT8,
|
|
|
|
|
gc.TINT64<<16 | gc.TUINT8,
|
|
|
|
|
gc.TUINT64<<16 | gc.TUINT8:
|
|
|
|
|
a = x86.AMOVB
|
|
|
|
|
|
|
|
|
|
case gc.TINT16<<16 | gc.TINT16, // same size
|
|
|
|
|
gc.TINT16<<16 | gc.TUINT16,
|
|
|
|
|
gc.TUINT16<<16 | gc.TINT16,
|
|
|
|
|
gc.TUINT16<<16 | gc.TUINT16,
|
|
|
|
|
gc.TINT32<<16 | gc.TINT16,
|
|
|
|
|
// truncate
|
|
|
|
|
gc.TUINT32<<16 | gc.TINT16,
|
|
|
|
|
gc.TINT64<<16 | gc.TINT16,
|
|
|
|
|
gc.TUINT64<<16 | gc.TINT16,
|
|
|
|
|
gc.TINT32<<16 | gc.TUINT16,
|
|
|
|
|
gc.TUINT32<<16 | gc.TUINT16,
|
|
|
|
|
gc.TINT64<<16 | gc.TUINT16,
|
|
|
|
|
gc.TUINT64<<16 | gc.TUINT16:
|
|
|
|
|
a = x86.AMOVW
|
|
|
|
|
|
|
|
|
|
case gc.TINT32<<16 | gc.TINT32, // same size
|
|
|
|
|
gc.TINT32<<16 | gc.TUINT32,
|
|
|
|
|
gc.TUINT32<<16 | gc.TINT32,
|
|
|
|
|
gc.TUINT32<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVL
|
|
|
|
|
|
|
|
|
|
case gc.TINT64<<16 | gc.TINT32, // truncate
|
|
|
|
|
gc.TUINT64<<16 | gc.TINT32,
|
|
|
|
|
gc.TINT64<<16 | gc.TUINT32,
|
|
|
|
|
gc.TUINT64<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVQL
|
|
|
|
|
|
|
|
|
|
case gc.TINT64<<16 | gc.TINT64, // same size
|
|
|
|
|
gc.TINT64<<16 | gc.TUINT64,
|
|
|
|
|
gc.TUINT64<<16 | gc.TINT64,
|
|
|
|
|
gc.TUINT64<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVQ
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* integer up-conversions
|
|
|
|
|
*/
|
|
|
|
|
case gc.TINT8<<16 | gc.TINT16, // sign extend int8
|
|
|
|
|
gc.TINT8<<16 | gc.TUINT16:
|
|
|
|
|
a = x86.AMOVBWSX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT8<<16 | gc.TINT32,
|
|
|
|
|
gc.TINT8<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVBLSX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT8<<16 | gc.TINT64,
|
|
|
|
|
gc.TINT8<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVBQSX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TUINT8<<16 | gc.TINT16, // zero extend uint8
|
|
|
|
|
gc.TUINT8<<16 | gc.TUINT16:
|
|
|
|
|
a = x86.AMOVBWZX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TUINT8<<16 | gc.TINT32,
|
|
|
|
|
gc.TUINT8<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVBLZX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TUINT8<<16 | gc.TINT64,
|
|
|
|
|
gc.TUINT8<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVBQZX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT16<<16 | gc.TINT32, // sign extend int16
|
|
|
|
|
gc.TINT16<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVWLSX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT16<<16 | gc.TINT64,
|
|
|
|
|
gc.TINT16<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVWQSX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TUINT16<<16 | gc.TINT32, // zero extend uint16
|
|
|
|
|
gc.TUINT16<<16 | gc.TUINT32:
|
|
|
|
|
a = x86.AMOVWLZX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TUINT16<<16 | gc.TINT64,
|
|
|
|
|
gc.TUINT16<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVWQZX
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT32<<16 | gc.TINT64, // sign extend int32
|
|
|
|
|
gc.TINT32<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVLQSX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
// AMOVL into a register zeros the top of the register,
|
|
|
|
|
// so this is not always necessary, but if we rely on AMOVL
|
|
|
|
|
// the optimizer is almost certain to screw with us.
|
|
|
|
|
case gc.TUINT32<<16 | gc.TINT64, // zero extend uint32
|
|
|
|
|
gc.TUINT32<<16 | gc.TUINT64:
|
|
|
|
|
a = x86.AMOVLQZX
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* float to integer
|
|
|
|
|
*/
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TINT32:
|
|
|
|
|
a = x86.ACVTTSS2SL
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT64<<16 | gc.TINT32:
|
|
|
|
|
a = x86.ACVTTSD2SL
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TINT64:
|
|
|
|
|
a = x86.ACVTTSS2SQ
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT64<<16 | gc.TINT64:
|
|
|
|
|
a = x86.ACVTTSD2SQ
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
// convert via int32.
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TINT16,
|
|
|
|
|
gc.TFLOAT32<<16 | gc.TINT8,
|
|
|
|
|
gc.TFLOAT32<<16 | gc.TUINT16,
|
|
|
|
|
gc.TFLOAT32<<16 | gc.TUINT8,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TINT16,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TINT8,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TUINT16,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TUINT8:
|
|
|
|
|
cvt = gc.Types[gc.TINT32]
|
|
|
|
|
|
|
|
|
|
goto hard
|
|
|
|
|
|
|
|
|
|
// convert via int64.
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TUINT32,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TUINT32:
|
|
|
|
|
cvt = gc.Types[gc.TINT64]
|
|
|
|
|
|
|
|
|
|
goto hard
|
|
|
|
|
|
|
|
|
|
// algorithm is:
|
|
|
|
|
// if small enough, use native float64 -> int64 conversion.
|
|
|
|
|
// otherwise, subtract 2^63, convert, and add it back.
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TUINT64,
|
|
|
|
|
gc.TFLOAT64<<16 | gc.TUINT64:
|
2015-02-23 16:07:24 -05:00
|
|
|
a := x86.ACVTTSS2SQ
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if ft == gc.TFLOAT64 {
|
|
|
|
|
a = x86.ACVTTSD2SQ
|
|
|
|
|
}
|
|
|
|
|
bignodes()
|
2015-02-23 16:07:24 -05:00
|
|
|
var r1 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r1, gc.Types[ft], nil)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r2 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r2, gc.Types[tt], t)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r3 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r3, gc.Types[ft], nil)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r4 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r4, gc.Types[tt], nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
gins(optoas(gc.OAS, f.Type), f, &r1)
|
|
|
|
|
gins(optoas(gc.OCMP, f.Type), &bigf, &r1)
|
2015-02-23 16:07:24 -05:00
|
|
|
p1 := gc.Gbranch(optoas(gc.OLE, f.Type), nil, +1)
|
2015-02-13 14:40:36 -05:00
|
|
|
gins(a, &r1, &r2)
|
2015-02-23 16:07:24 -05:00
|
|
|
p2 := gc.Gbranch(obj.AJMP, nil, 0)
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Patch(p1, gc.Pc)
|
|
|
|
|
gins(optoas(gc.OAS, f.Type), &bigf, &r3)
|
|
|
|
|
gins(optoas(gc.OSUB, f.Type), &r3, &r1)
|
|
|
|
|
gins(a, &r1, &r2)
|
|
|
|
|
gins(x86.AMOVQ, &bigi, &r4)
|
|
|
|
|
gins(x86.AXORQ, &r4, &r2)
|
|
|
|
|
gc.Patch(p2, gc.Pc)
|
|
|
|
|
gmove(&r2, t)
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&r4)
|
|
|
|
|
gc.Regfree(&r3)
|
|
|
|
|
gc.Regfree(&r2)
|
|
|
|
|
gc.Regfree(&r1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* integer to float
|
|
|
|
|
*/
|
|
|
|
|
case gc.TINT32<<16 | gc.TFLOAT32:
|
|
|
|
|
a = x86.ACVTSL2SS
|
|
|
|
|
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT32<<16 | gc.TFLOAT64:
|
|
|
|
|
a = x86.ACVTSL2SD
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT64<<16 | gc.TFLOAT32:
|
|
|
|
|
a = x86.ACVTSQ2SS
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TINT64<<16 | gc.TFLOAT64:
|
|
|
|
|
a = x86.ACVTSQ2SD
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
// convert via int32
|
|
|
|
|
case gc.TINT16<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TINT16<<16 | gc.TFLOAT64,
|
|
|
|
|
gc.TINT8<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TINT8<<16 | gc.TFLOAT64,
|
|
|
|
|
gc.TUINT16<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TUINT16<<16 | gc.TFLOAT64,
|
|
|
|
|
gc.TUINT8<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TUINT8<<16 | gc.TFLOAT64:
|
|
|
|
|
cvt = gc.Types[gc.TINT32]
|
|
|
|
|
|
|
|
|
|
goto hard
|
|
|
|
|
|
|
|
|
|
// convert via int64.
|
|
|
|
|
case gc.TUINT32<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TUINT32<<16 | gc.TFLOAT64:
|
|
|
|
|
cvt = gc.Types[gc.TINT64]
|
|
|
|
|
|
|
|
|
|
goto hard
|
|
|
|
|
|
|
|
|
|
// algorithm is:
|
|
|
|
|
// if small enough, use native int64 -> uint64 conversion.
|
|
|
|
|
// otherwise, halve (rounding to odd?), convert, and double.
|
|
|
|
|
case gc.TUINT64<<16 | gc.TFLOAT32,
|
|
|
|
|
gc.TUINT64<<16 | gc.TFLOAT64:
|
2015-02-23 16:07:24 -05:00
|
|
|
a := x86.ACVTSQ2SS
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
if tt == gc.TFLOAT64 {
|
|
|
|
|
a = x86.ACVTSQ2SD
|
|
|
|
|
}
|
2015-02-23 16:07:24 -05:00
|
|
|
var zero gc.Node
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Nodconst(&zero, gc.Types[gc.TUINT64], 0)
|
2015-02-23 16:07:24 -05:00
|
|
|
var one gc.Node
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Nodconst(&one, gc.Types[gc.TUINT64], 1)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r1 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r1, f.Type, f)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r2 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r2, t.Type, t)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r3 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r3, f.Type, nil)
|
2015-02-23 16:07:24 -05:00
|
|
|
var r4 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r4, f.Type, nil)
|
2015-02-13 14:40:36 -05:00
|
|
|
gmove(f, &r1)
|
|
|
|
|
gins(x86.ACMPQ, &r1, &zero)
|
2015-02-23 16:07:24 -05:00
|
|
|
p1 := gc.Gbranch(x86.AJLT, nil, +1)
|
2015-02-13 14:40:36 -05:00
|
|
|
gins(a, &r1, &r2)
|
2015-02-23 16:07:24 -05:00
|
|
|
p2 := gc.Gbranch(obj.AJMP, nil, 0)
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Patch(p1, gc.Pc)
|
|
|
|
|
gmove(&r1, &r3)
|
|
|
|
|
gins(x86.ASHRQ, &one, &r3)
|
|
|
|
|
gmove(&r1, &r4)
|
|
|
|
|
gins(x86.AANDL, &one, &r4)
|
|
|
|
|
gins(x86.AORQ, &r4, &r3)
|
|
|
|
|
gins(a, &r3, &r2)
|
|
|
|
|
gins(optoas(gc.OADD, t.Type), &r2, &r2)
|
|
|
|
|
gc.Patch(p2, gc.Pc)
|
|
|
|
|
gmove(&r2, t)
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&r4)
|
|
|
|
|
gc.Regfree(&r3)
|
|
|
|
|
gc.Regfree(&r2)
|
|
|
|
|
gc.Regfree(&r1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* float to float
|
|
|
|
|
*/
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TFLOAT32:
|
|
|
|
|
a = x86.AMOVSS
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT64<<16 | gc.TFLOAT64:
|
|
|
|
|
a = x86.AMOVSD
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT32<<16 | gc.TFLOAT64:
|
|
|
|
|
a = x86.ACVTSS2SD
|
|
|
|
|
goto rdst
|
|
|
|
|
|
|
|
|
|
case gc.TFLOAT64<<16 | gc.TFLOAT32:
|
|
|
|
|
a = x86.ACVTSD2SS
|
|
|
|
|
goto rdst
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
gins(a, f, t)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
// requires register destination
|
|
|
|
|
rdst:
|
2015-03-02 20:34:22 -05:00
|
|
|
{
|
|
|
|
|
var r1 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r1, t.Type, t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 20:34:22 -05:00
|
|
|
gins(a, f, &r1)
|
|
|
|
|
gmove(&r1, t)
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&r1)
|
2015-03-02 20:34:22 -05:00
|
|
|
return
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
// requires register intermediate
|
|
|
|
|
hard:
|
2015-03-02 20:34:22 -05:00
|
|
|
var r1 gc.Node
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(&r1, cvt, t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
gmove(f, &r1)
|
|
|
|
|
gmove(&r1, t)
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&r1)
|
2015-02-13 14:40:36 -05:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
func samaddr(f *gc.Node, t *gc.Node) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if f.Op != t.Op {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch f.Op {
|
|
|
|
|
case gc.OREGISTER:
|
2015-04-13 10:28:57 -07:00
|
|
|
if f.Reg != t.Reg {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2015-02-17 22:13:49 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* generate one instruction:
|
|
|
|
|
* as f, t
|
|
|
|
|
*/
|
2016-03-07 18:00:08 -08:00
|
|
|
func gins(as obj.As, f *gc.Node, t *gc.Node) *obj.Prog {
|
2015-02-13 14:40:36 -05:00
|
|
|
// Node nod;
|
|
|
|
|
|
|
|
|
|
// if(f != N && f->op == OINDEX) {
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Regalloc(&nod, ®node, Z);
|
2015-02-13 14:40:36 -05:00
|
|
|
// v = constnode.vconst;
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Cgen(f->right, &nod);
|
2015-02-13 14:40:36 -05:00
|
|
|
// constnode.vconst = v;
|
|
|
|
|
// idx.reg = nod.reg;
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Regfree(&nod);
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
|
|
|
|
// if(t != N && t->op == OINDEX) {
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Regalloc(&nod, ®node, Z);
|
2015-02-13 14:40:36 -05:00
|
|
|
// v = constnode.vconst;
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Cgen(t->right, &nod);
|
2015-02-13 14:40:36 -05:00
|
|
|
// constnode.vconst = v;
|
|
|
|
|
// idx.reg = nod.reg;
|
2015-03-18 17:26:36 -04:00
|
|
|
// gc.Regfree(&nod);
|
2015-02-13 14:40:36 -05:00
|
|
|
// }
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
if f != nil && f.Op == gc.OADDR && (as == x86.AMOVL || as == x86.AMOVQ) {
|
|
|
|
|
// Turn MOVL $xxx into LEAL xxx.
|
|
|
|
|
// These should be equivalent but most of the backend
|
|
|
|
|
// only expects to see LEAL, because that's what we had
|
|
|
|
|
// historically generated. Various hidden assumptions are baked in by now.
|
|
|
|
|
if as == x86.AMOVL {
|
|
|
|
|
as = x86.ALEAL
|
|
|
|
|
} else {
|
|
|
|
|
as = x86.ALEAQ
|
|
|
|
|
}
|
|
|
|
|
f = f.Left
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
switch as {
|
|
|
|
|
case x86.AMOVB,
|
|
|
|
|
x86.AMOVW,
|
|
|
|
|
x86.AMOVL,
|
|
|
|
|
x86.AMOVQ,
|
|
|
|
|
x86.AMOVSS,
|
|
|
|
|
x86.AMOVSD:
|
2015-02-17 22:13:49 -05:00
|
|
|
if f != nil && t != nil && samaddr(f, t) {
|
2015-02-13 14:40:36 -05:00
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case x86.ALEAQ:
|
2015-02-17 22:13:49 -05:00
|
|
|
if f != nil && gc.Isconst(f, gc.CTNIL) {
|
2015-08-30 23:10:03 +02:00
|
|
|
gc.Fatalf("gins LEAQ nil %v", f.Type)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
p := gc.Prog(as)
|
2015-03-16 15:27:19 -04:00
|
|
|
gc.Naddr(&p.From, f)
|
|
|
|
|
gc.Naddr(&p.To, t)
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
if gc.Debug['g'] != 0 {
|
|
|
|
|
fmt.Printf("%v\n", p)
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
w := int32(0)
|
2015-02-13 14:40:36 -05:00
|
|
|
switch as {
|
|
|
|
|
case x86.AMOVB:
|
|
|
|
|
w = 1
|
|
|
|
|
|
|
|
|
|
case x86.AMOVW:
|
|
|
|
|
w = 2
|
|
|
|
|
|
|
|
|
|
case x86.AMOVL:
|
|
|
|
|
w = 4
|
|
|
|
|
|
|
|
|
|
case x86.AMOVQ:
|
|
|
|
|
w = 8
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-16 15:27:19 -04:00
|
|
|
if w != 0 && ((f != nil && p.From.Width < int64(w)) || (t != nil && p.To.Width > int64(w))) {
|
2015-02-13 14:40:36 -05:00
|
|
|
gc.Dump("f", f)
|
|
|
|
|
gc.Dump("t", t)
|
2015-08-30 23:10:03 +02:00
|
|
|
gc.Fatalf("bad width: %v (%d, %d)\n", p, p.From.Width, p.To.Width)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if p.To.Type == obj.TYPE_ADDR && w > 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
gc.Fatalf("bad use of addr: %v", p)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return p
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
func ginsnop() {
|
|
|
|
|
// This is actually not the x86 NOP anymore,
|
|
|
|
|
// but at the point where it gets used, AX is dead
|
|
|
|
|
// so it's okay if we lose the high bits.
|
|
|
|
|
var reg gc.Node
|
|
|
|
|
gc.Nodreg(®, gc.Types[gc.TINT], x86.REG_AX)
|
|
|
|
|
gins(x86.AXCHGL, ®, ®)
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* return Axxx for Oxxx on type t.
|
|
|
|
|
*/
|
2016-03-07 18:00:08 -08:00
|
|
|
func optoas(op gc.Op, t *gc.Type) obj.As {
|
2015-02-13 14:40:36 -05:00
|
|
|
if t == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
gc.Fatalf("optoas: t is nil")
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
// avoid constant conversions in switches below
|
|
|
|
|
const (
|
|
|
|
|
OMINUS_ = uint32(gc.OMINUS) << 16
|
|
|
|
|
OLSH_ = uint32(gc.OLSH) << 16
|
|
|
|
|
ORSH_ = uint32(gc.ORSH) << 16
|
|
|
|
|
OADD_ = uint32(gc.OADD) << 16
|
|
|
|
|
OSUB_ = uint32(gc.OSUB) << 16
|
|
|
|
|
OMUL_ = uint32(gc.OMUL) << 16
|
|
|
|
|
ODIV_ = uint32(gc.ODIV) << 16
|
|
|
|
|
OMOD_ = uint32(gc.OMOD) << 16
|
|
|
|
|
OOR_ = uint32(gc.OOR) << 16
|
|
|
|
|
OAND_ = uint32(gc.OAND) << 16
|
|
|
|
|
OXOR_ = uint32(gc.OXOR) << 16
|
|
|
|
|
OEQ_ = uint32(gc.OEQ) << 16
|
|
|
|
|
ONE_ = uint32(gc.ONE) << 16
|
|
|
|
|
OLT_ = uint32(gc.OLT) << 16
|
|
|
|
|
OLE_ = uint32(gc.OLE) << 16
|
|
|
|
|
OGE_ = uint32(gc.OGE) << 16
|
|
|
|
|
OGT_ = uint32(gc.OGT) << 16
|
|
|
|
|
OCMP_ = uint32(gc.OCMP) << 16
|
|
|
|
|
OPS_ = uint32(gc.OPS) << 16
|
|
|
|
|
OPC_ = uint32(gc.OPC) << 16
|
|
|
|
|
OAS_ = uint32(gc.OAS) << 16
|
|
|
|
|
OHMUL_ = uint32(gc.OHMUL) << 16
|
|
|
|
|
OSQRT_ = uint32(gc.OSQRT) << 16
|
|
|
|
|
OADDR_ = uint32(gc.OADDR) << 16
|
|
|
|
|
OINC_ = uint32(gc.OINC) << 16
|
|
|
|
|
ODEC_ = uint32(gc.ODEC) << 16
|
|
|
|
|
OLROT_ = uint32(gc.OLROT) << 16
|
|
|
|
|
ORROTC_ = uint32(gc.ORROTC) << 16
|
|
|
|
|
OEXTEND_ = uint32(gc.OEXTEND) << 16
|
|
|
|
|
)
|
|
|
|
|
|
2015-02-23 16:07:24 -05:00
|
|
|
a := obj.AXXX
|
2015-02-13 14:40:36 -05:00
|
|
|
switch uint32(op)<<16 | uint32(gc.Simtype[t.Etype]) {
|
|
|
|
|
default:
|
2016-03-07 08:23:55 -08:00
|
|
|
gc.Fatalf("optoas: no entry %v-%v", gc.Oconv(op, 0), t)
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADDR_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ALEAL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADDR_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ALEAQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OEQ_ | gc.TBOOL,
|
|
|
|
|
OEQ_ | gc.TINT8,
|
|
|
|
|
OEQ_ | gc.TUINT8,
|
|
|
|
|
OEQ_ | gc.TINT16,
|
|
|
|
|
OEQ_ | gc.TUINT16,
|
|
|
|
|
OEQ_ | gc.TINT32,
|
|
|
|
|
OEQ_ | gc.TUINT32,
|
|
|
|
|
OEQ_ | gc.TINT64,
|
|
|
|
|
OEQ_ | gc.TUINT64,
|
|
|
|
|
OEQ_ | gc.TPTR32,
|
|
|
|
|
OEQ_ | gc.TPTR64,
|
|
|
|
|
OEQ_ | gc.TFLOAT32,
|
|
|
|
|
OEQ_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJEQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ONE_ | gc.TBOOL,
|
|
|
|
|
ONE_ | gc.TINT8,
|
|
|
|
|
ONE_ | gc.TUINT8,
|
|
|
|
|
ONE_ | gc.TINT16,
|
|
|
|
|
ONE_ | gc.TUINT16,
|
|
|
|
|
ONE_ | gc.TINT32,
|
|
|
|
|
ONE_ | gc.TUINT32,
|
|
|
|
|
ONE_ | gc.TINT64,
|
|
|
|
|
ONE_ | gc.TUINT64,
|
|
|
|
|
ONE_ | gc.TPTR32,
|
|
|
|
|
ONE_ | gc.TPTR64,
|
|
|
|
|
ONE_ | gc.TFLOAT32,
|
|
|
|
|
ONE_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJNE
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OPS_ | gc.TBOOL,
|
|
|
|
|
OPS_ | gc.TINT8,
|
|
|
|
|
OPS_ | gc.TUINT8,
|
|
|
|
|
OPS_ | gc.TINT16,
|
|
|
|
|
OPS_ | gc.TUINT16,
|
|
|
|
|
OPS_ | gc.TINT32,
|
|
|
|
|
OPS_ | gc.TUINT32,
|
|
|
|
|
OPS_ | gc.TINT64,
|
|
|
|
|
OPS_ | gc.TUINT64,
|
|
|
|
|
OPS_ | gc.TPTR32,
|
|
|
|
|
OPS_ | gc.TPTR64,
|
|
|
|
|
OPS_ | gc.TFLOAT32,
|
|
|
|
|
OPS_ | gc.TFLOAT64:
|
2015-03-18 17:26:36 -04:00
|
|
|
a = x86.AJPS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OPC_ | gc.TBOOL,
|
|
|
|
|
OPC_ | gc.TINT8,
|
|
|
|
|
OPC_ | gc.TUINT8,
|
|
|
|
|
OPC_ | gc.TINT16,
|
|
|
|
|
OPC_ | gc.TUINT16,
|
|
|
|
|
OPC_ | gc.TINT32,
|
|
|
|
|
OPC_ | gc.TUINT32,
|
|
|
|
|
OPC_ | gc.TINT64,
|
|
|
|
|
OPC_ | gc.TUINT64,
|
|
|
|
|
OPC_ | gc.TPTR32,
|
|
|
|
|
OPC_ | gc.TPTR64,
|
|
|
|
|
OPC_ | gc.TFLOAT32,
|
|
|
|
|
OPC_ | gc.TFLOAT64:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
a = x86.AJPC
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLT_ | gc.TINT8,
|
|
|
|
|
OLT_ | gc.TINT16,
|
|
|
|
|
OLT_ | gc.TINT32,
|
|
|
|
|
OLT_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJLT
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLT_ | gc.TUINT8,
|
|
|
|
|
OLT_ | gc.TUINT16,
|
|
|
|
|
OLT_ | gc.TUINT32,
|
|
|
|
|
OLT_ | gc.TUINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJCS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLE_ | gc.TINT8,
|
|
|
|
|
OLE_ | gc.TINT16,
|
|
|
|
|
OLE_ | gc.TINT32,
|
|
|
|
|
OLE_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJLE
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLE_ | gc.TUINT8,
|
|
|
|
|
OLE_ | gc.TUINT16,
|
|
|
|
|
OLE_ | gc.TUINT32,
|
|
|
|
|
OLE_ | gc.TUINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJLS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OGT_ | gc.TINT8,
|
|
|
|
|
OGT_ | gc.TINT16,
|
|
|
|
|
OGT_ | gc.TINT32,
|
|
|
|
|
OGT_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJGT
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OGT_ | gc.TUINT8,
|
|
|
|
|
OGT_ | gc.TUINT16,
|
|
|
|
|
OGT_ | gc.TUINT32,
|
|
|
|
|
OGT_ | gc.TUINT64,
|
|
|
|
|
OLT_ | gc.TFLOAT32,
|
|
|
|
|
OLT_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJHI
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OGE_ | gc.TINT8,
|
|
|
|
|
OGE_ | gc.TINT16,
|
|
|
|
|
OGE_ | gc.TINT32,
|
|
|
|
|
OGE_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJGE
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OGE_ | gc.TUINT8,
|
|
|
|
|
OGE_ | gc.TUINT16,
|
|
|
|
|
OGE_ | gc.TUINT32,
|
|
|
|
|
OGE_ | gc.TUINT64,
|
|
|
|
|
OLE_ | gc.TFLOAT32,
|
|
|
|
|
OLE_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AJCC
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TBOOL,
|
|
|
|
|
OCMP_ | gc.TINT8,
|
|
|
|
|
OCMP_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACMPB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TINT16,
|
|
|
|
|
OCMP_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACMPW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TINT32,
|
|
|
|
|
OCMP_ | gc.TUINT32,
|
|
|
|
|
OCMP_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACMPL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TINT64,
|
|
|
|
|
OCMP_ | gc.TUINT64,
|
|
|
|
|
OCMP_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACMPQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AUCOMISS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OCMP_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AUCOMISD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TBOOL,
|
|
|
|
|
OAS_ | gc.TINT8,
|
|
|
|
|
OAS_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TINT16,
|
|
|
|
|
OAS_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TINT32,
|
|
|
|
|
OAS_ | gc.TUINT32,
|
|
|
|
|
OAS_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TINT64,
|
|
|
|
|
OAS_ | gc.TUINT64,
|
|
|
|
|
OAS_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVSS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAS_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMOVSD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TINT8,
|
|
|
|
|
OADD_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TINT16,
|
|
|
|
|
OADD_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TINT32,
|
|
|
|
|
OADD_ | gc.TUINT32,
|
|
|
|
|
OADD_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TINT64,
|
|
|
|
|
OADD_ | gc.TUINT64,
|
|
|
|
|
OADD_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDSS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OADD_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AADDSD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TINT8,
|
|
|
|
|
OSUB_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TINT16,
|
|
|
|
|
OSUB_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TINT32,
|
|
|
|
|
OSUB_ | gc.TUINT32,
|
|
|
|
|
OSUB_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TINT64,
|
|
|
|
|
OSUB_ | gc.TUINT64,
|
|
|
|
|
OSUB_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBSS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSUB_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASUBSD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OINC_ | gc.TINT8,
|
|
|
|
|
OINC_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AINCB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OINC_ | gc.TINT16,
|
|
|
|
|
OINC_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AINCW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OINC_ | gc.TINT32,
|
|
|
|
|
OINC_ | gc.TUINT32,
|
|
|
|
|
OINC_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AINCL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OINC_ | gc.TINT64,
|
|
|
|
|
OINC_ | gc.TUINT64,
|
|
|
|
|
OINC_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AINCQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODEC_ | gc.TINT8,
|
|
|
|
|
ODEC_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADECB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODEC_ | gc.TINT16,
|
|
|
|
|
ODEC_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADECW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODEC_ | gc.TINT32,
|
|
|
|
|
ODEC_ | gc.TUINT32,
|
|
|
|
|
ODEC_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADECL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODEC_ | gc.TINT64,
|
|
|
|
|
ODEC_ | gc.TUINT64,
|
|
|
|
|
ODEC_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADECQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMINUS_ | gc.TINT8,
|
|
|
|
|
OMINUS_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ANEGB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMINUS_ | gc.TINT16,
|
|
|
|
|
OMINUS_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ANEGW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMINUS_ | gc.TINT32,
|
|
|
|
|
OMINUS_ | gc.TUINT32,
|
|
|
|
|
OMINUS_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ANEGL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMINUS_ | gc.TINT64,
|
|
|
|
|
OMINUS_ | gc.TUINT64,
|
|
|
|
|
OMINUS_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ANEGQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAND_ | gc.TBOOL,
|
|
|
|
|
OAND_ | gc.TINT8,
|
|
|
|
|
OAND_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AANDB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAND_ | gc.TINT16,
|
|
|
|
|
OAND_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AANDW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAND_ | gc.TINT32,
|
|
|
|
|
OAND_ | gc.TUINT32,
|
|
|
|
|
OAND_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AANDL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OAND_ | gc.TINT64,
|
|
|
|
|
OAND_ | gc.TUINT64,
|
|
|
|
|
OAND_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AANDQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OOR_ | gc.TBOOL,
|
|
|
|
|
OOR_ | gc.TINT8,
|
|
|
|
|
OOR_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AORB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OOR_ | gc.TINT16,
|
|
|
|
|
OOR_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AORW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OOR_ | gc.TINT32,
|
|
|
|
|
OOR_ | gc.TUINT32,
|
|
|
|
|
OOR_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AORL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OOR_ | gc.TINT64,
|
|
|
|
|
OOR_ | gc.TUINT64,
|
|
|
|
|
OOR_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AORQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OXOR_ | gc.TINT8,
|
|
|
|
|
OXOR_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AXORB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OXOR_ | gc.TINT16,
|
|
|
|
|
OXOR_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AXORW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OXOR_ | gc.TINT32,
|
|
|
|
|
OXOR_ | gc.TUINT32,
|
|
|
|
|
OXOR_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AXORL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OXOR_ | gc.TINT64,
|
|
|
|
|
OXOR_ | gc.TUINT64,
|
|
|
|
|
OXOR_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AXORQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLROT_ | gc.TINT8,
|
|
|
|
|
OLROT_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AROLB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLROT_ | gc.TINT16,
|
|
|
|
|
OLROT_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AROLW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLROT_ | gc.TINT32,
|
|
|
|
|
OLROT_ | gc.TUINT32,
|
|
|
|
|
OLROT_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AROLL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLROT_ | gc.TINT64,
|
|
|
|
|
OLROT_ | gc.TUINT64,
|
|
|
|
|
OLROT_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AROLQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLSH_ | gc.TINT8,
|
|
|
|
|
OLSH_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHLB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLSH_ | gc.TINT16,
|
|
|
|
|
OLSH_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHLW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLSH_ | gc.TINT32,
|
|
|
|
|
OLSH_ | gc.TUINT32,
|
|
|
|
|
OLSH_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHLL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OLSH_ | gc.TINT64,
|
|
|
|
|
OLSH_ | gc.TUINT64,
|
|
|
|
|
OLSH_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHLQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHRB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHRW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TUINT32,
|
|
|
|
|
ORSH_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHRL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TUINT64,
|
|
|
|
|
ORSH_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASHRQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASARB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASARW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TINT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASARL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORSH_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ASARQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORROTC_ | gc.TINT8,
|
|
|
|
|
ORROTC_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ARCRB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORROTC_ | gc.TINT16,
|
|
|
|
|
ORROTC_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ARCRW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORROTC_ | gc.TINT32,
|
|
|
|
|
ORROTC_ | gc.TUINT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ARCRL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ORROTC_ | gc.TINT64,
|
|
|
|
|
ORROTC_ | gc.TUINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ARCRQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TINT8,
|
|
|
|
|
OMUL_ | gc.TINT8,
|
|
|
|
|
OMUL_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIMULB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TINT16,
|
|
|
|
|
OMUL_ | gc.TINT16,
|
|
|
|
|
OMUL_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIMULW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TINT32,
|
|
|
|
|
OMUL_ | gc.TINT32,
|
|
|
|
|
OMUL_ | gc.TUINT32,
|
|
|
|
|
OMUL_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIMULL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TINT64,
|
|
|
|
|
OMUL_ | gc.TINT64,
|
|
|
|
|
OMUL_ | gc.TUINT64,
|
|
|
|
|
OMUL_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIMULQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TUINT32,
|
|
|
|
|
OHMUL_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OHMUL_ | gc.TUINT64,
|
|
|
|
|
OHMUL_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMUL_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULSS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OMUL_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AMULSD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TINT8,
|
|
|
|
|
OMOD_ | gc.TINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIDIVB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TUINT8,
|
|
|
|
|
OMOD_ | gc.TUINT8:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVB
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TINT16,
|
|
|
|
|
OMOD_ | gc.TINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIDIVW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TUINT16,
|
|
|
|
|
OMOD_ | gc.TUINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVW
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TINT32,
|
|
|
|
|
OMOD_ | gc.TINT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIDIVL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TUINT32,
|
|
|
|
|
ODIV_ | gc.TPTR32,
|
|
|
|
|
OMOD_ | gc.TUINT32,
|
|
|
|
|
OMOD_ | gc.TPTR32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVL
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TINT64,
|
|
|
|
|
OMOD_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.AIDIVQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TUINT64,
|
|
|
|
|
ODIV_ | gc.TPTR64,
|
|
|
|
|
OMOD_ | gc.TUINT64,
|
|
|
|
|
OMOD_ | gc.TPTR64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OEXTEND_ | gc.TINT16:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACWD
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OEXTEND_ | gc.TINT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACDQ
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OEXTEND_ | gc.TINT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ACQO
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TFLOAT32:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVSS
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case ODIV_ | gc.TFLOAT64:
|
2015-02-13 14:40:36 -05:00
|
|
|
a = x86.ADIVSD
|
2015-04-01 16:02:34 -04:00
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
case OSQRT_ | gc.TFLOAT64:
|
2015-04-01 16:02:34 -04:00
|
|
|
a = x86.ASQRTSD
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return a
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
// jmptoset returns ASETxx for AJxx.
|
2016-03-07 18:00:08 -08:00
|
|
|
func jmptoset(jmp obj.As) obj.As {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
switch jmp {
|
|
|
|
|
case x86.AJEQ:
|
|
|
|
|
return x86.ASETEQ
|
|
|
|
|
case x86.AJNE:
|
|
|
|
|
return x86.ASETNE
|
|
|
|
|
case x86.AJLT:
|
|
|
|
|
return x86.ASETLT
|
|
|
|
|
case x86.AJCS:
|
|
|
|
|
return x86.ASETCS
|
|
|
|
|
case x86.AJLE:
|
|
|
|
|
return x86.ASETLE
|
|
|
|
|
case x86.AJLS:
|
|
|
|
|
return x86.ASETLS
|
|
|
|
|
case x86.AJGT:
|
|
|
|
|
return x86.ASETGT
|
|
|
|
|
case x86.AJHI:
|
|
|
|
|
return x86.ASETHI
|
|
|
|
|
case x86.AJGE:
|
|
|
|
|
return x86.ASETGE
|
|
|
|
|
case x86.AJCC:
|
|
|
|
|
return x86.ASETCC
|
|
|
|
|
case x86.AJMI:
|
|
|
|
|
return x86.ASETMI
|
|
|
|
|
case x86.AJOC:
|
|
|
|
|
return x86.ASETOC
|
|
|
|
|
case x86.AJOS:
|
|
|
|
|
return x86.ASETOS
|
|
|
|
|
case x86.AJPC:
|
|
|
|
|
return x86.ASETPC
|
|
|
|
|
case x86.AJPL:
|
|
|
|
|
return x86.ASETPL
|
|
|
|
|
case x86.AJPS:
|
|
|
|
|
return x86.ASETPS
|
|
|
|
|
}
|
2016-03-07 08:23:55 -08:00
|
|
|
gc.Fatalf("jmptoset: no entry for %v", jmp)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
panic("unreachable")
|
|
|
|
|
}
|
|
|
|
|
|
2015-02-13 14:40:36 -05:00
|
|
|
const (
|
|
|
|
|
ODynam = 1 << 0
|
|
|
|
|
OAddable = 1 << 1
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
var clean [20]gc.Node
|
|
|
|
|
|
|
|
|
|
var cleani int = 0
|
|
|
|
|
|
|
|
|
|
func sudoclean() {
|
|
|
|
|
if clean[cleani-1].Op != gc.OEMPTY {
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&clean[cleani-1])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
if clean[cleani-2].Op != gc.OEMPTY {
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regfree(&clean[cleani-2])
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
cleani -= 2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* generate code to compute address of n,
|
|
|
|
|
* a reference to a (perhaps nested) field inside
|
|
|
|
|
* an array or struct.
|
|
|
|
|
* return 0 on failure, 1 on success.
|
|
|
|
|
* on success, leaves usable address in a.
|
|
|
|
|
*
|
|
|
|
|
* caller is responsible for calling sudoclean
|
|
|
|
|
* after successful sudoaddable,
|
|
|
|
|
* to release the register used for a.
|
|
|
|
|
*/
|
2016-03-07 18:00:08 -08:00
|
|
|
func sudoaddable(as obj.As, n *gc.Node, a *obj.Addr) bool {
|
2015-02-13 14:40:36 -05:00
|
|
|
if n.Type == nil {
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*a = obj.Addr{}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case gc.OLITERAL:
|
2015-02-17 22:13:49 -05:00
|
|
|
if !gc.Isconst(n, gc.CTINT) {
|
2015-02-13 14:40:36 -05:00
|
|
|
break
|
|
|
|
|
}
|
2015-04-22 20:08:03 -07:00
|
|
|
v := n.Int()
|
2015-02-13 14:40:36 -05:00
|
|
|
if v >= 32000 || v <= -32000 {
|
|
|
|
|
break
|
|
|
|
|
}
|
2015-03-02 12:35:15 -05:00
|
|
|
switch as {
|
|
|
|
|
default:
|
|
|
|
|
return false
|
|
|
|
|
|
|
|
|
|
case x86.AADDB,
|
|
|
|
|
x86.AADDW,
|
|
|
|
|
x86.AADDL,
|
|
|
|
|
x86.AADDQ,
|
|
|
|
|
x86.ASUBB,
|
|
|
|
|
x86.ASUBW,
|
|
|
|
|
x86.ASUBL,
|
|
|
|
|
x86.ASUBQ,
|
|
|
|
|
x86.AANDB,
|
|
|
|
|
x86.AANDW,
|
|
|
|
|
x86.AANDL,
|
|
|
|
|
x86.AANDQ,
|
|
|
|
|
x86.AORB,
|
|
|
|
|
x86.AORW,
|
|
|
|
|
x86.AORL,
|
|
|
|
|
x86.AORQ,
|
|
|
|
|
x86.AXORB,
|
|
|
|
|
x86.AXORW,
|
|
|
|
|
x86.AXORL,
|
|
|
|
|
x86.AXORQ,
|
|
|
|
|
x86.AINCB,
|
|
|
|
|
x86.AINCW,
|
|
|
|
|
x86.AINCL,
|
|
|
|
|
x86.AINCQ,
|
|
|
|
|
x86.ADECB,
|
|
|
|
|
x86.ADECW,
|
|
|
|
|
x86.ADECL,
|
|
|
|
|
x86.ADECQ,
|
|
|
|
|
x86.AMOVB,
|
|
|
|
|
x86.AMOVW,
|
|
|
|
|
x86.AMOVL,
|
|
|
|
|
x86.AMOVQ:
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cleani += 2
|
|
|
|
|
reg := &clean[cleani-1]
|
|
|
|
|
reg1 := &clean[cleani-2]
|
|
|
|
|
reg.Op = gc.OEMPTY
|
|
|
|
|
reg1.Op = gc.OEMPTY
|
2015-03-16 15:27:19 -04:00
|
|
|
gc.Naddr(a, n)
|
2015-03-02 12:35:15 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
|
|
|
|
|
case gc.ODOT,
|
|
|
|
|
gc.ODOTPTR:
|
|
|
|
|
cleani += 2
|
2015-03-02 12:35:15 -05:00
|
|
|
reg := &clean[cleani-1]
|
2015-02-23 16:07:24 -05:00
|
|
|
reg1 := &clean[cleani-2]
|
2015-02-13 14:40:36 -05:00
|
|
|
reg.Op = gc.OEMPTY
|
|
|
|
|
reg1.Op = gc.OEMPTY
|
2015-03-02 12:35:15 -05:00
|
|
|
var nn *gc.Node
|
|
|
|
|
var oary [10]int64
|
|
|
|
|
o := gc.Dotoffset(n, oary[:], &nn)
|
|
|
|
|
if nn == nil {
|
|
|
|
|
sudoclean()
|
|
|
|
|
return false
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if nn.Addable && o == 1 && oary[0] >= 0 {
|
2015-03-02 12:35:15 -05:00
|
|
|
// directly addressable set of DOTs
|
|
|
|
|
n1 := *nn
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 12:35:15 -05:00
|
|
|
n1.Type = n.Type
|
|
|
|
|
n1.Xoffset += oary[0]
|
2015-03-16 15:27:19 -04:00
|
|
|
gc.Naddr(a, &n1)
|
2015-03-02 12:35:15 -05:00
|
|
|
return true
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Regalloc(reg, gc.Types[gc.Tptr], nil)
|
2015-03-02 12:35:15 -05:00
|
|
|
n1 := *reg
|
|
|
|
|
n1.Op = gc.OINDREG
|
|
|
|
|
if oary[0] >= 0 {
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Agen(nn, reg)
|
2015-03-02 12:35:15 -05:00
|
|
|
n1.Xoffset = oary[0]
|
|
|
|
|
} else {
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Cgen(nn, reg)
|
2015-03-02 12:35:15 -05:00
|
|
|
gc.Cgen_checknil(reg)
|
|
|
|
|
n1.Xoffset = -(oary[0] + 1)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 12:35:15 -05:00
|
|
|
for i := 1; i < o; i++ {
|
|
|
|
|
if oary[i] >= 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
gc.Fatalf("can't happen")
|
2015-03-02 12:35:15 -05:00
|
|
|
}
|
|
|
|
|
gins(movptr, &n1, reg)
|
|
|
|
|
gc.Cgen_checknil(reg)
|
|
|
|
|
n1.Xoffset = -(oary[i] + 1)
|
|
|
|
|
}
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 12:35:15 -05:00
|
|
|
a.Type = obj.TYPE_NONE
|
2015-05-14 20:11:28 -07:00
|
|
|
a.Index = x86.REG_NONE
|
2015-03-18 17:26:36 -04:00
|
|
|
gc.Fixlargeoffset(&n1)
|
2015-03-16 15:27:19 -04:00
|
|
|
gc.Naddr(a, &n1)
|
2015-03-02 12:35:15 -05:00
|
|
|
return true
|
2015-02-13 14:40:36 -05:00
|
|
|
|
2015-03-02 12:35:15 -05:00
|
|
|
case gc.OINDEX:
|
|
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|
|
|
|
|
|
2015-02-17 22:13:49 -05:00
|
|
|
return false
|
2015-02-13 14:40:36 -05:00
|
|
|
}
|