2015-03-18 17:26:36 -04:00
|
|
|
// Copyright 2009 The Go Authors. All rights reserved.
|
|
|
|
|
// Use of this source code is governed by a BSD-style
|
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
|
|
package gc
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"cmd/internal/obj"
|
cmd/compile, cmd/link, runtime: on ppc64x, maintain the TOC pointer in R2 when compiling PIC
The PowerPC ISA does not have a PC-relative load instruction, which poses
obvious challenges when generating position-independent code. The way the ELFv2
ABI addresses this is to specify that r2 points to a per "module" (shared
library or executable) TOC pointer. Maintaining this pointer requires
cooperation between codegen and the system linker:
* Non-leaf functions leave space on the stack at r1+24 to save the TOC pointer.
* A call to a function that *might* have to go via a PLT stub must be followed
by a nop instruction that the system linker can replace with "ld r1, 24(r1)"
to restore the TOC pointer (only when dynamically linking Go code).
* When calling a function via a function pointer, the address of the function
must be in r12, and the first couple of instructions (the "global entry
point") of the called function use this to derive the address of the TOC
for the module it is in.
* When calling a function that is implemented in the same module, the system
linker adjusts the call to skip over the instructions mentioned above (the
"local entry point"), assuming that r2 is already correctly set.
So this changeset adds the global entry point instructions, sets the metadata so
the system linker knows where the local entry point is, inserts code to save the
TOC pointer at 24(r1), adds a nop after any call not known to be local and copes
with the odd non-local code transfer in the runtime (e.g. the stuff around
jmpdefer). It does not actually compile PIC yet.
Change-Id: I7522e22bdfd2f891745a900c60254fe9e372c854
Reviewed-on: https://go-review.googlesource.com/15967
Reviewed-by: Russ Cox <rsc@golang.org>
2015-10-16 15:42:09 +13:00
|
|
|
"cmd/internal/obj/ppc64"
|
2015-03-18 17:26:36 -04:00
|
|
|
"fmt"
|
|
|
|
|
)
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate:
|
|
|
|
|
// res = n;
|
|
|
|
|
// simplifies and calls Thearch.Gmove.
|
|
|
|
|
// if wb is true, need to emit write barriers.
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
func Cgen(n, res *Node) {
|
|
|
|
|
cgen_wb(n, res, false)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cgen_wb(n, res *Node, wb bool) {
|
2015-03-18 17:26:36 -04:00
|
|
|
if Debug['g'] != 0 {
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
op := "cgen"
|
|
|
|
|
if wb {
|
|
|
|
|
op = "cgen_wb"
|
|
|
|
|
}
|
|
|
|
|
Dump("\n"+op+"-n", n)
|
|
|
|
|
Dump(op+"-res", res)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n == nil || n.Type == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if res == nil || res.Type == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen: res nil")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for n.Op == OCONVNOP {
|
|
|
|
|
n = n.Left
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
cgen_slice(n, res, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OEFACE:
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if res.Op != ONAME || !res.Addable || wb {
|
2015-03-18 17:26:36 -04:00
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen_eface(n, &n1)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(&n1, res, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
} else {
|
|
|
|
|
Cgen_eface(n, res)
|
|
|
|
|
}
|
|
|
|
|
return
|
2015-03-20 00:06:10 -04:00
|
|
|
|
|
|
|
|
case ODOTTYPE:
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_dottype(n, res, nil, wb)
|
2015-03-20 00:06:10 -04:00
|
|
|
return
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
|
|
|
|
|
case OAPPEND:
|
|
|
|
|
cgen_append(n, res)
|
|
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Ullman >= UINF {
|
|
|
|
|
if n.Op == OINDREG {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen: this is going to miscompile")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
if res.Ullman >= UINF {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen(n, &n1)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(&n1, res, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isfat(n.Type) {
|
|
|
|
|
if n.Type.Width < 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("forgot to compute width for %v", n.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
sgen_wb(n, res, n.Type.Width, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !res.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
if n.Ullman > res.Ullman {
|
|
|
|
|
if Ctxt.Arch.Regsize == 4 && Is64(n.Type) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen(n, &n1)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(&n1, res, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, n.Type, res)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
if n1.Ullman > res.Ullman {
|
|
|
|
|
Dump("n1", &n1)
|
|
|
|
|
Dump("res", res)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("loop in cgen")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(&n1, res, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
Regfree(&n1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var f int
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if res.Ullman < UINF {
|
|
|
|
|
if Complexop(n, res) {
|
|
|
|
|
Complexgen(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
f = 1 // gen thru register
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OLITERAL:
|
|
|
|
|
if Smallintconst(n) {
|
|
|
|
|
f = 0
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
case OREGISTER:
|
2015-03-18 17:26:36 -04:00
|
|
|
f = 0
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if !Iscomplex[n.Type.Etype] && Ctxt.Arch.Regsize == 8 && !wb {
|
|
|
|
|
a := Thearch.Optoas(OAS, res.Type)
|
|
|
|
|
var addr obj.Addr
|
|
|
|
|
if Thearch.Sudoaddable(a, res, &addr) {
|
|
|
|
|
var p1 *obj.Prog
|
|
|
|
|
if f != 0 {
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, res.Type, nil)
|
|
|
|
|
Cgen(n, &n2)
|
|
|
|
|
p1 = Thearch.Gins(a, &n2, nil)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
} else {
|
|
|
|
|
p1 = Thearch.Gins(a, n, nil)
|
|
|
|
|
}
|
|
|
|
|
p1.To = addr
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
fmt.Printf("%v [ignore previous line]\n", p1)
|
|
|
|
|
}
|
|
|
|
|
Thearch.Sudoclean()
|
|
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Ctxt.Arch.Thechar == '8' {
|
|
|
|
|
// no registers to speak of
|
|
|
|
|
var n1, n2 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
Igen(res, &n2, nil)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(&n1, &n2, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
Regfree(&n2)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(res, &n1, nil)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
cgen_wb(n, &n1, wb)
|
2015-03-18 17:26:36 -04:00
|
|
|
Regfree(&n1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// update addressability for string, slice
|
|
|
|
|
// can't do in walk because n->left->addable
|
|
|
|
|
// changes if n->left is an escaping local variable.
|
|
|
|
|
switch n.Op {
|
2015-04-01 09:38:44 -07:00
|
|
|
case OSPTR, OLEN:
|
2015-03-18 17:26:36 -04:00
|
|
|
if Isslice(n.Left.Type) || Istype(n.Left.Type, TSTRING) {
|
|
|
|
|
n.Addable = n.Left.Addable
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OCAP:
|
|
|
|
|
if Isslice(n.Left.Type) {
|
|
|
|
|
n.Addable = n.Left.Addable
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OITAB:
|
|
|
|
|
n.Addable = n.Left.Addable
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if wb {
|
2015-09-24 23:21:18 +02:00
|
|
|
if Simtype[res.Type.Etype] != Tptr {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_wb of type %v", res.Type)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
}
|
|
|
|
|
if n.Ullman >= UINF {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
n = &n1
|
|
|
|
|
}
|
|
|
|
|
cgen_wbptr(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write barrier now handled. Code below this line can ignore wb.
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
if Ctxt.Arch.Thechar == '5' { // TODO(rsc): Maybe more often?
|
|
|
|
|
// if both are addressable, move
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable && res.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
if Is64(n.Type) || Is64(res.Type) || n.Op == OREGISTER || res.Op == OREGISTER || Iscomplex[n.Type.Etype] || Iscomplex[res.Type.Etype] {
|
|
|
|
|
Thearch.Gmove(n, res)
|
|
|
|
|
} else {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, n.Type, nil)
|
|
|
|
|
Thearch.Gmove(n, &n1)
|
|
|
|
|
Cgen(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if both are not addressable, use a temporary.
|
2015-04-02 19:58:37 -07:00
|
|
|
if !n.Addable && !res.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
// could use regalloc here sometimes,
|
|
|
|
|
// but have to check for ullman >= UINF.
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
Cgen(&n1, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if result is not addressable directly but n is,
|
|
|
|
|
// compute its address and then store via the address.
|
2015-04-02 19:58:37 -07:00
|
|
|
if !res.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
var n1 Node
|
|
|
|
|
Igen(res, &n1, nil)
|
|
|
|
|
Cgen(n, &n1)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Complexop(n, res) {
|
|
|
|
|
Complexgen(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if (Ctxt.Arch.Thechar == '6' || Ctxt.Arch.Thechar == '8') && n.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Gmove(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-10 11:33:09 -04:00
|
|
|
if Ctxt.Arch.Thechar == '0' || Ctxt.Arch.Thechar == '7' || Ctxt.Arch.Thechar == '9' {
|
2015-03-18 17:26:36 -04:00
|
|
|
// if both are addressable, move
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
if n.Op == OREGISTER || res.Op == OREGISTER {
|
|
|
|
|
Thearch.Gmove(n, res)
|
|
|
|
|
} else {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, n.Type, nil)
|
|
|
|
|
Thearch.Gmove(n, &n1)
|
|
|
|
|
Cgen(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if n is sudoaddable generate addr and move
|
|
|
|
|
if Ctxt.Arch.Thechar == '5' && !Is64(n.Type) && !Is64(res.Type) && !Iscomplex[n.Type.Etype] && !Iscomplex[res.Type.Etype] {
|
|
|
|
|
a := Thearch.Optoas(OAS, n.Type)
|
|
|
|
|
var addr obj.Addr
|
|
|
|
|
if Thearch.Sudoaddable(a, n, &addr) {
|
|
|
|
|
if res.Op != OREGISTER {
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, res.Type, nil)
|
|
|
|
|
p1 := Thearch.Gins(a, nil, &n2)
|
|
|
|
|
p1.From = addr
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
fmt.Printf("%v [ignore previous line]\n", p1)
|
|
|
|
|
}
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
} else {
|
|
|
|
|
p1 := Thearch.Gins(a, nil, res)
|
|
|
|
|
p1.From = addr
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
fmt.Printf("%v [ignore previous line]\n", p1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Thearch.Sudoclean()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nl := n.Left
|
|
|
|
|
nr := n.Right
|
|
|
|
|
|
|
|
|
|
if nl != nil && nl.Ullman >= UINF {
|
|
|
|
|
if nr != nil && nr.Ullman >= UINF {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, nl.Type)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
n2 := *n
|
|
|
|
|
n2.Left = &n1
|
|
|
|
|
Cgen(&n2, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// 64-bit ops are hard on 32-bit machine.
|
|
|
|
|
if Ctxt.Arch.Regsize == 4 && (Is64(n.Type) || Is64(res.Type) || n.Left != nil && Is64(n.Left.Type)) {
|
|
|
|
|
switch n.Op {
|
|
|
|
|
// math goes to cgen64.
|
|
|
|
|
case OMINUS,
|
|
|
|
|
OCOM,
|
|
|
|
|
OADD,
|
|
|
|
|
OSUB,
|
|
|
|
|
OMUL,
|
|
|
|
|
OLROT,
|
|
|
|
|
OLSH,
|
|
|
|
|
ORSH,
|
|
|
|
|
OAND,
|
|
|
|
|
OOR,
|
|
|
|
|
OXOR:
|
|
|
|
|
Thearch.Cgen64(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Thearch.Cgen_float != nil && nl != nil && Isfloat[n.Type.Etype] && Isfloat[nl.Type.Etype] {
|
|
|
|
|
Thearch.Cgen_float(n, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !Iscomplex[n.Type.Etype] && Ctxt.Arch.Regsize == 8 {
|
|
|
|
|
a := Thearch.Optoas(OAS, n.Type)
|
|
|
|
|
var addr obj.Addr
|
|
|
|
|
if Thearch.Sudoaddable(a, n, &addr) {
|
|
|
|
|
if res.Op == OREGISTER {
|
|
|
|
|
p1 := Thearch.Gins(a, nil, res)
|
|
|
|
|
p1.From = addr
|
|
|
|
|
} else {
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, n.Type, nil)
|
|
|
|
|
p1 := Thearch.Gins(a, nil, &n2)
|
|
|
|
|
p1.From = addr
|
|
|
|
|
Thearch.Gins(a, &n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Thearch.Sudoclean()
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var a int
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
|
|
|
|
Dump("cgen", n)
|
|
|
|
|
Dump("cgen-res", res)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
case OOROR, OANDAND,
|
|
|
|
|
OEQ, ONE,
|
|
|
|
|
OLT, OLE,
|
|
|
|
|
OGE, OGT,
|
2015-03-18 17:26:36 -04:00
|
|
|
ONOT:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
Bvgen(n, res, true)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OPLUS:
|
|
|
|
|
Cgen(nl, res)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
// unary
|
|
|
|
|
case OCOM:
|
|
|
|
|
a := Thearch.Optoas(OXOR, nl.Type)
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, nil)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, nl.Type, -1)
|
|
|
|
|
Thearch.Gins(a, &n2, &n1)
|
|
|
|
|
cgen_norm(n, &n1, res)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OMINUS:
|
|
|
|
|
if Isfloat[nl.Type.Etype] {
|
|
|
|
|
nr = Nodintconst(-1)
|
|
|
|
|
Convlit(&nr, n.Type)
|
|
|
|
|
a = Thearch.Optoas(OMUL, nl.Type)
|
|
|
|
|
goto sbop
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
a := Thearch.Optoas(n.Op, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
// unary
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
if Ctxt.Arch.Thechar == '5' {
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, nl.Type, 0)
|
|
|
|
|
Thearch.Gins(a, &n2, &n1)
|
|
|
|
|
} else if Ctxt.Arch.Thechar == '7' {
|
|
|
|
|
Thearch.Gins(a, &n1, &n1)
|
|
|
|
|
} else {
|
|
|
|
|
Thearch.Gins(a, nil, &n1)
|
|
|
|
|
}
|
|
|
|
|
cgen_norm(n, &n1, res)
|
|
|
|
|
return
|
|
|
|
|
|
2015-04-01 16:02:34 -04:00
|
|
|
case OSQRT:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Cgen(n.Left, &n1)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OSQRT, nl.Type), &n1, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
return
|
|
|
|
|
|
2015-04-03 12:23:28 -04:00
|
|
|
case OGETG:
|
|
|
|
|
Thearch.Getg(res)
|
|
|
|
|
return
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
// symmetric binary
|
|
|
|
|
case OAND,
|
|
|
|
|
OOR,
|
|
|
|
|
OXOR,
|
|
|
|
|
OADD,
|
|
|
|
|
OMUL:
|
2015-09-24 23:21:18 +02:00
|
|
|
if n.Op == OMUL && Thearch.Cgen_bmul != nil && Thearch.Cgen_bmul(n.Op, nl, nr, res) {
|
2015-03-18 17:26:36 -04:00
|
|
|
break
|
|
|
|
|
}
|
2015-09-24 23:21:18 +02:00
|
|
|
a = Thearch.Optoas(n.Op, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
goto sbop
|
|
|
|
|
|
|
|
|
|
// asymmetric binary
|
|
|
|
|
case OSUB:
|
2015-09-24 23:21:18 +02:00
|
|
|
a = Thearch.Optoas(n.Op, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
goto abop
|
|
|
|
|
|
|
|
|
|
case OHMUL:
|
|
|
|
|
Thearch.Cgen_hmul(nl, nr, res)
|
|
|
|
|
|
|
|
|
|
case OCONV:
|
|
|
|
|
if Eqtype(n.Type, nl.Type) || Noconv(n.Type, nl.Type) {
|
|
|
|
|
Cgen(nl, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Ctxt.Arch.Thechar == '8' {
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
Tempname(&n2, n.Type)
|
|
|
|
|
Mgen(nl, &n1, res)
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Mfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
if Ctxt.Arch.Thechar == '5' {
|
2015-04-02 19:58:37 -07:00
|
|
|
if nl.Addable && !Is64(nl.Type) {
|
2015-03-18 17:26:36 -04:00
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Thearch.Gmove(nl, &n1)
|
|
|
|
|
} else {
|
|
|
|
|
if n.Type.Width > int64(Widthptr) || Is64(nl.Type) || Isfloat[nl.Type.Etype] {
|
|
|
|
|
Tempname(&n1, nl.Type)
|
|
|
|
|
} else {
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
}
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
}
|
|
|
|
|
if n.Type.Width > int64(Widthptr) || Is64(n.Type) || Isfloat[n.Type.Etype] {
|
|
|
|
|
Tempname(&n2, n.Type)
|
|
|
|
|
} else {
|
|
|
|
|
Regalloc(&n2, n.Type, nil)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if n.Type.Width > nl.Type.Width {
|
|
|
|
|
// If loading from memory, do conversion during load,
|
|
|
|
|
// so as to avoid use of 8-bit register in, say, int(*byteptr).
|
|
|
|
|
switch nl.Op {
|
|
|
|
|
case ODOT, ODOTPTR, OINDEX, OIND, ONAME:
|
|
|
|
|
Igen(nl, &n1, res)
|
|
|
|
|
Regalloc(&n2, n.Type, res)
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Regalloc(&n2, n.Type, &n1)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// if we do the conversion n1 -> n2 here
|
|
|
|
|
// reusing the register, then gmove won't
|
|
|
|
|
// have to allocate its own register.
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
if n2.Op == OREGISTER {
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
if n1.Op == OREGISTER {
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ODOT,
|
|
|
|
|
ODOTPTR,
|
|
|
|
|
OINDEX,
|
|
|
|
|
OIND,
|
|
|
|
|
ONAME: // PHEAP or PPARAMREF var
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(n, &n1, res)
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
// interface table is first word of interface value
|
|
|
|
|
case OITAB:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(nl, &n1, res)
|
|
|
|
|
|
|
|
|
|
n1.Type = n.Type
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
case OSPTR:
|
|
|
|
|
// pointer is the first word of string or slice.
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, Types[Tptr], res)
|
|
|
|
|
p1 := Thearch.Gins(Thearch.Optoas(OAS, n1.Type), nil, &n1)
|
2015-05-27 00:47:05 -04:00
|
|
|
Datastring(nl.Val().U.(string), &p1.From)
|
2015-03-18 17:26:36 -04:00
|
|
|
p1.From.Type = obj.TYPE_ADDR
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(nl, &n1, res)
|
|
|
|
|
n1.Type = n.Type
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
case OLEN:
|
|
|
|
|
if Istype(nl.Type, TMAP) || Istype(nl.Type, TCHAN) {
|
|
|
|
|
// map and chan have len in the first int-sized word.
|
|
|
|
|
// a zero pointer means zero length
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, Types[Tptr], res)
|
|
|
|
|
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, Types[Tptr], 0)
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OEQ, Types[Tptr], &n1, &n2, 0)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
n2 = n1
|
|
|
|
|
n2.Op = OINDREG
|
|
|
|
|
n2.Type = Types[Simtype[TINT]]
|
|
|
|
|
Thearch.Gmove(&n2, &n1)
|
|
|
|
|
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Istype(nl.Type, TSTRING) || Isslice(nl.Type) {
|
|
|
|
|
// both slice and string have len one pointer into the struct.
|
|
|
|
|
// a zero pointer means zero length
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(nl, &n1, res)
|
|
|
|
|
|
|
|
|
|
n1.Type = Types[Simtype[TUINT]]
|
|
|
|
|
n1.Xoffset += int64(Array_nel)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen: OLEN: unknown type %v", Tconv(nl.Type, obj.FmtLong))
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
case OCAP:
|
|
|
|
|
if Istype(nl.Type, TCHAN) {
|
|
|
|
|
// chan has cap in the second int-sized word.
|
|
|
|
|
// a zero pointer means zero length
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, Types[Tptr], res)
|
|
|
|
|
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, Types[Tptr], 0)
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OEQ, Types[Tptr], &n1, &n2, 0)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
n2 = n1
|
|
|
|
|
n2.Op = OINDREG
|
|
|
|
|
n2.Xoffset = int64(Widthint)
|
|
|
|
|
n2.Type = Types[Simtype[TINT]]
|
|
|
|
|
Thearch.Gmove(&n2, &n1)
|
|
|
|
|
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isslice(nl.Type) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(nl, &n1, res)
|
|
|
|
|
n1.Type = Types[Simtype[TUINT]]
|
|
|
|
|
n1.Xoffset += int64(Array_cap)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen: OCAP: unknown type %v", Tconv(nl.Type, obj.FmtLong))
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
case OADDR:
|
|
|
|
|
if n.Bounded { // let race detector avoid nil checks
|
|
|
|
|
Disable_checknil++
|
|
|
|
|
}
|
|
|
|
|
Agen(nl, res)
|
|
|
|
|
if n.Bounded {
|
|
|
|
|
Disable_checknil--
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OCALLMETH:
|
|
|
|
|
cgen_callmeth(n, 0)
|
|
|
|
|
cgen_callret(n, res)
|
|
|
|
|
|
|
|
|
|
case OCALLINTER:
|
|
|
|
|
cgen_callinter(n, res, 0)
|
|
|
|
|
cgen_callret(n, res)
|
|
|
|
|
|
|
|
|
|
case OCALLFUNC:
|
|
|
|
|
cgen_call(n, 0)
|
|
|
|
|
cgen_callret(n, res)
|
|
|
|
|
|
|
|
|
|
case OMOD, ODIV:
|
|
|
|
|
if Isfloat[n.Type.Etype] || Thearch.Dodiv == nil {
|
2015-09-24 23:21:18 +02:00
|
|
|
a = Thearch.Optoas(n.Op, nl.Type)
|
2015-03-18 17:26:36 -04:00
|
|
|
goto abop
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if nl.Ullman >= nr.Ullman {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Cgen(nl, &n1)
|
2015-09-24 23:21:18 +02:00
|
|
|
cgen_div(n.Op, &n1, nr, res)
|
2015-03-18 17:26:36 -04:00
|
|
|
Regfree(&n1)
|
|
|
|
|
} else {
|
|
|
|
|
var n2 Node
|
|
|
|
|
if !Smallintconst(nr) {
|
|
|
|
|
Regalloc(&n2, nr.Type, res)
|
|
|
|
|
Cgen(nr, &n2)
|
|
|
|
|
} else {
|
|
|
|
|
n2 = *nr
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
cgen_div(n.Op, nl, &n2, res)
|
2015-03-18 17:26:36 -04:00
|
|
|
if n2.Op != OLITERAL {
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OLSH, ORSH, OLROT:
|
2015-09-24 23:21:18 +02:00
|
|
|
Thearch.Cgen_shift(n.Op, n.Bounded, nl, nr, res)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// put simplest on right - we'll generate into left
|
|
|
|
|
// and then adjust it using the computation of right.
|
|
|
|
|
// constants and variables have the same ullman
|
|
|
|
|
// count, so look for constants specially.
|
|
|
|
|
//
|
|
|
|
|
// an integer constant we can use as an immediate
|
|
|
|
|
// is simpler than a variable - we can use the immediate
|
|
|
|
|
// in the adjustment instruction directly - so it goes
|
|
|
|
|
// on the right.
|
|
|
|
|
//
|
|
|
|
|
// other constants, like big integers or floating point
|
|
|
|
|
// constants, require a mov into a register, so those
|
|
|
|
|
// might as well go on the left, so we can reuse that
|
|
|
|
|
// register for the computation.
|
2015-03-18 17:26:36 -04:00
|
|
|
sbop: // symmetric binary
|
|
|
|
|
if nl.Ullman < nr.Ullman || (nl.Ullman == nr.Ullman && (Smallintconst(nl) || (nr.Op == OLITERAL && !Smallintconst(nr)))) {
|
2015-09-06 16:59:57 +02:00
|
|
|
nl, nr = nr, nl
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
abop: // asymmetric binary
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n2 Node
|
|
|
|
|
if Ctxt.Arch.Thechar == '8' {
|
|
|
|
|
// no registers, sigh
|
|
|
|
|
if Smallintconst(nr) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Mgen(nl, &n1, res)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, nl.Type, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Thearch.Gins(a, nr, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
Mfree(&n1)
|
|
|
|
|
} else if nl.Ullman >= nr.Ullman {
|
|
|
|
|
var nt Node
|
|
|
|
|
Tempname(&nt, nl.Type)
|
|
|
|
|
Cgen(nl, &nt)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Mgen(nr, &n2, nil)
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Thearch.Gmove(&nt, &n1)
|
|
|
|
|
Thearch.Gins(a, &n2, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
Mfree(&n2)
|
|
|
|
|
} else {
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, nr.Type, res)
|
|
|
|
|
Cgen(nr, &n2)
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, nil)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
Thearch.Gins(a, &n2, &n1)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if nl.Ullman >= nr.Ullman {
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
|
2015-09-10 11:33:09 -04:00
|
|
|
if Smallintconst(nr) && Ctxt.Arch.Thechar != '0' && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' { // TODO(rsc): Check opcode for arm
|
2015-03-18 17:26:36 -04:00
|
|
|
n2 = *nr
|
|
|
|
|
} else {
|
|
|
|
|
Regalloc(&n2, nr.Type, nil)
|
|
|
|
|
Cgen(nr, &n2)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
2015-09-10 11:33:09 -04:00
|
|
|
if Smallintconst(nr) && Ctxt.Arch.Thechar != '0' && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' { // TODO(rsc): Check opcode for arm
|
2015-03-18 17:26:36 -04:00
|
|
|
n2 = *nr
|
|
|
|
|
} else {
|
|
|
|
|
Regalloc(&n2, nr.Type, res)
|
|
|
|
|
Cgen(nr, &n2)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Regalloc(&n1, nl.Type, nil)
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Thearch.Gins(a, &n2, &n1)
|
|
|
|
|
if n2.Op != OLITERAL {
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
cgen_norm(n, &n1, res)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
var sys_wbptr *Node
|
|
|
|
|
|
|
|
|
|
func cgen_wbptr(n, res *Node) {
|
2015-11-02 16:45:07 -05:00
|
|
|
if Curfn != nil {
|
2016-02-26 13:32:28 -08:00
|
|
|
if Curfn.Func.Pragma&Nowritebarrier != 0 {
|
2015-11-02 16:45:07 -05:00
|
|
|
Yyerror("write barrier prohibited")
|
|
|
|
|
}
|
|
|
|
|
if Curfn.Func.WBLineno == 0 {
|
|
|
|
|
Curfn.Func.WBLineno = lineno
|
|
|
|
|
}
|
2015-05-19 15:15:52 -04:00
|
|
|
}
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if Debug_wb > 0 {
|
|
|
|
|
Warn("write barrier")
|
|
|
|
|
}
|
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr
I believe the benchmarks that get slower are under register pressure,
and not making the call unconditionally makes the pressure worse,
and the register allocator doesn't do a great job. But part of the point
of this sequence is to get the write barriers out of the way so I can work
on the register allocator, so that's okay.
name old new delta
BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~
BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~
BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00%
BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88%
BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84%
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26%
BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37%
BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~
BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54%
BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~
BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~
BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23%
BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~
BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~
BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27%
BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65%
BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17%
BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55%
BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38%
BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~
BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30%
BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31%
BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~
Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2
Reviewed-on: https://go-review.googlesource.com/9224
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
var dst, src Node
|
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr
I believe the benchmarks that get slower are under register pressure,
and not making the call unconditionally makes the pressure worse,
and the register allocator doesn't do a great job. But part of the point
of this sequence is to get the write barriers out of the way so I can work
on the register allocator, so that's okay.
name old new delta
BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~
BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~
BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00%
BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88%
BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84%
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26%
BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37%
BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~
BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54%
BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~
BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~
BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23%
BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~
BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~
BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27%
BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65%
BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17%
BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55%
BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38%
BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~
BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30%
BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31%
BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~
Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2
Reviewed-on: https://go-review.googlesource.com/9224
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
|
|
|
Igen(res, &dst, nil)
|
|
|
|
|
if n.Op == OREGISTER {
|
|
|
|
|
src = *n
|
|
|
|
|
Regrealloc(&src)
|
|
|
|
|
} else {
|
|
|
|
|
Cgenr(n, &src, nil)
|
|
|
|
|
}
|
|
|
|
|
|
2016-03-04 15:19:06 -08:00
|
|
|
wbVar := syslook("writeBarrier")
|
2015-11-13 17:45:22 -08:00
|
|
|
wbEnabled := Nod(ODOT, wbVar, newname(wbVar.Type.Type.Sym))
|
|
|
|
|
wbEnabled = typecheck(&wbEnabled, Erv)
|
2015-05-06 12:28:19 -04:00
|
|
|
pbr := Thearch.Ginscmp(ONE, Types[TUINT8], wbEnabled, Nodintconst(0), -1)
|
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr
I believe the benchmarks that get slower are under register pressure,
and not making the call unconditionally makes the pressure worse,
and the register allocator doesn't do a great job. But part of the point
of this sequence is to get the write barriers out of the way so I can work
on the register allocator, so that's okay.
name old new delta
BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~
BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~
BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00%
BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88%
BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84%
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26%
BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37%
BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~
BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54%
BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~
BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~
BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23%
BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~
BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~
BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27%
BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65%
BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17%
BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55%
BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38%
BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~
BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30%
BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31%
BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~
Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2
Reviewed-on: https://go-review.googlesource.com/9224
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, &dst)
|
|
|
|
|
pjmp := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(pbr, Pc)
|
|
|
|
|
var adst Node
|
|
|
|
|
Agenr(&dst, &adst, &dst)
|
|
|
|
|
p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &adst, nil)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
a := &p.To
|
|
|
|
|
a.Type = obj.TYPE_MEM
|
|
|
|
|
a.Reg = int16(Thearch.REGSP)
|
2015-10-08 22:13:44 +13:00
|
|
|
a.Offset = Ctxt.FixedFrameSize()
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
|
|
|
|
|
p2.To = p.To
|
|
|
|
|
p2.To.Offset += int64(Widthptr)
|
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr
I believe the benchmarks that get slower are under register pressure,
and not making the call unconditionally makes the pressure worse,
and the register allocator doesn't do a great job. But part of the point
of this sequence is to get the write barriers out of the way so I can work
on the register allocator, so that's okay.
name old new delta
BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~
BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~
BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00%
BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88%
BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84%
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26%
BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37%
BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~
BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54%
BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~
BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~
BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23%
BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~
BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~
BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27%
BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65%
BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17%
BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55%
BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38%
BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~
BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30%
BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31%
BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~
Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2
Reviewed-on: https://go-review.googlesource.com/9224
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
|
|
|
Regfree(&adst)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if sys_wbptr == nil {
|
|
|
|
|
sys_wbptr = writebarrierfn("writebarrierptr", Types[Tptr], Types[Tptr])
|
|
|
|
|
}
|
|
|
|
|
Ginscall(sys_wbptr, 0)
|
cmd/internal/gc: inline writeBarrierEnabled check before calling writebarrierptr
I believe the benchmarks that get slower are under register pressure,
and not making the call unconditionally makes the pressure worse,
and the register allocator doesn't do a great job. But part of the point
of this sequence is to get the write barriers out of the way so I can work
on the register allocator, so that's okay.
name old new delta
BenchmarkBinaryTree17 17.9s × (1.00,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.43s × (1.00,1.00) ~
BenchmarkFmtFprintfEmpty 110ns × (1.00,1.06) 114ns × (0.95,1.05) ~
BenchmarkFmtFprintfString 487ns × (0.99,1.00) 468ns × (0.99,1.01) -4.00%
BenchmarkFmtFprintfInt 450ns × (0.99,1.00) 433ns × (1.00,1.01) -3.88%
BenchmarkFmtFprintfIntInt 762ns × (1.00,1.00) 748ns × (0.99,1.01) -1.84%
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.01) 547ns × (0.99,1.01) -6.26%
BenchmarkFmtFprintfFloat 738ns × (1.00,1.00) 756ns × (1.00,1.01) +2.37%
BenchmarkFmtManyArgs 2.80µs × (1.00,1.01) 2.79µs × (1.00,1.01) ~
BenchmarkGobDecode 39.0ms × (0.99,1.00) 39.6ms × (0.99,1.00) +1.54%
BenchmarkGobEncode 37.8ms × (0.98,1.01) 37.6ms × (1.00,1.01) ~
BenchmarkGzip 661ms × (0.99,1.01) 663ms × (0.99,1.02) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.3ms × (0.99,1.01) 56.2ms × (0.99,1.01) ~
BenchmarkJSONDecode 138ms × (0.99,1.01) 138ms × (1.00,1.00) ~
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.03ms × (1.00,1.01) +0.23%
BenchmarkGoParse 10.2ms × (0.87,1.05) 9.8ms × (0.93,1.10) ~
BenchmarkRegexpMatchEasy0_32 208ns × (1.00,1.00) 207ns × (1.00,1.00) ~
BenchmarkRegexpMatchEasy0_1K 588ns × (1.00,1.00) 581ns × (1.00,1.01) -1.27%
BenchmarkRegexpMatchEasy1_32 182ns × (0.99,1.01) 185ns × (0.99,1.01) +1.65%
BenchmarkRegexpMatchEasy1_1K 986ns × (1.00,1.01) 975ns × (1.00,1.01) -1.17%
BenchmarkRegexpMatchMedium_32 323ns × (1.00,1.01) 328ns × (0.99,1.00) +1.55%
BenchmarkRegexpMatchMedium_1K 89.9µs × (1.00,1.00) 88.6µs × (1.00,1.01) -1.38%
BenchmarkRegexpMatchHard_32 4.72µs × (0.95,1.01) 4.69µs × (0.95,1.03) ~
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 133µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (1.00,1.05) 902ms × (0.99,1.05) ~
BenchmarkTemplate 168ms × (0.99,1.01) 174ms × (0.99,1.01) +3.30%
BenchmarkTimeParse 637ns × (1.00,1.00) 639ns × (1.00,1.00) +0.31%
BenchmarkTimeFormat 738ns × (1.00,1.00) 736ns × (1.00,1.01) ~
Change-Id: I03ce152852edec404538f6c20eb650fac82e2aa2
Reviewed-on: https://go-review.googlesource.com/9224
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 14:13:06 -04:00
|
|
|
Patch(pjmp, Pc)
|
|
|
|
|
|
|
|
|
|
Regfree(&dst)
|
|
|
|
|
Regfree(&src)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func cgen_wbfat(n, res *Node) {
|
2015-11-02 16:45:07 -05:00
|
|
|
if Curfn != nil {
|
2016-02-26 13:32:28 -08:00
|
|
|
if Curfn.Func.Pragma&Nowritebarrier != 0 {
|
2015-11-02 16:45:07 -05:00
|
|
|
Yyerror("write barrier prohibited")
|
|
|
|
|
}
|
|
|
|
|
if Curfn.Func.WBLineno == 0 {
|
|
|
|
|
Curfn.Func.WBLineno = lineno
|
|
|
|
|
}
|
2015-05-19 15:15:52 -04:00
|
|
|
}
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if Debug_wb > 0 {
|
|
|
|
|
Warn("write barrier")
|
|
|
|
|
}
|
|
|
|
|
needType := true
|
|
|
|
|
funcName := "typedmemmove"
|
|
|
|
|
var dst, src Node
|
|
|
|
|
if n.Ullman >= res.Ullman {
|
|
|
|
|
Agenr(n, &src, nil)
|
|
|
|
|
Agenr(res, &dst, nil)
|
|
|
|
|
} else {
|
|
|
|
|
Agenr(res, &dst, nil)
|
|
|
|
|
Agenr(n, &src, nil)
|
|
|
|
|
}
|
|
|
|
|
p := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &dst, nil)
|
|
|
|
|
a := &p.To
|
|
|
|
|
a.Type = obj.TYPE_MEM
|
|
|
|
|
a.Reg = int16(Thearch.REGSP)
|
2015-10-08 22:13:44 +13:00
|
|
|
a.Offset = Ctxt.FixedFrameSize()
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if needType {
|
|
|
|
|
a.Offset += int64(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
p2 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
|
|
|
|
|
p2.To = p.To
|
|
|
|
|
p2.To.Offset += int64(Widthptr)
|
|
|
|
|
Regfree(&dst)
|
|
|
|
|
if needType {
|
cmd/internal/gc: emit typedmemmove write barrier from sgen
Emitting it here instead of rewriting the tree earlier sets us up
to generate an inline check, like we do for single pointers.
But even without the inline check, generating at this level lets
us generate significantly more efficient code, probably due to
having fewer temporaries and less complex high-level code
for the compiler to churn through.
Revcomp is worse, almost certainly due to register pressure.
name old new delta
BenchmarkBinaryTree17 18.0s × (0.99,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.36s × (1.00,1.00) -1.44%
BenchmarkFmtFprintfEmpty 114ns × (0.95,1.05) 86ns × (0.97,1.06) -24.12%
BenchmarkFmtFprintfString 468ns × (0.99,1.01) 420ns × (0.99,1.02) -10.16%
BenchmarkFmtFprintfInt 433ns × (1.00,1.01) 386ns × (0.99,1.02) -10.74%
BenchmarkFmtFprintfIntInt 748ns × (0.99,1.01) 647ns × (0.99,1.01) -13.56%
BenchmarkFmtFprintfPrefixedInt 547ns × (0.99,1.01) 499ns × (0.99,1.02) -8.78%
BenchmarkFmtFprintfFloat 756ns × (1.00,1.01) 689ns × (1.00,1.00) -8.86%
BenchmarkFmtManyArgs 2.79µs × (1.00,1.01) 2.53µs × (1.00,1.00) -9.30%
BenchmarkGobDecode 39.6ms × (0.99,1.00) 39.2ms × (0.98,1.01) -1.07%
BenchmarkGobEncode 37.6ms × (1.00,1.01) 37.5ms × (0.99,1.01) ~
BenchmarkGzip 663ms × (0.99,1.02) 660ms × (0.98,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 143ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 133µs × (0.99,1.02) ~
BenchmarkJSONEncode 56.2ms × (0.99,1.01) 54.0ms × (0.98,1.01) -3.97%
BenchmarkJSONDecode 138ms × (1.00,1.00) 134ms × (0.99,1.02) -2.70%
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.00ms × (1.00,1.01) ~
BenchmarkGoParse 9.82ms × (0.93,1.10) 10.35ms × (0.88,1.11) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.00) 163ns × (0.99,1.01) -21.26%
BenchmarkRegexpMatchEasy0_1K 581ns × (1.00,1.01) 566ns × (0.99,1.00) -2.50%
BenchmarkRegexpMatchEasy1_32 185ns × (0.99,1.01) 138ns × (1.00,1.01) -25.41%
BenchmarkRegexpMatchEasy1_1K 975ns × (1.00,1.01) 892ns × (1.00,1.00) -8.51%
BenchmarkRegexpMatchMedium_32 328ns × (0.99,1.00) 252ns × (1.00,1.00) -23.17%
BenchmarkRegexpMatchMedium_1K 88.6µs × (1.00,1.01) 73.0µs × (1.00,1.01) -17.66%
BenchmarkRegexpMatchHard_32 4.69µs × (0.95,1.03) 3.85µs × (1.00,1.01) -17.91%
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 117µs × (1.00,1.00) -12.34%
BenchmarkRevcomp 902ms × (0.99,1.05) 1001ms × (0.94,1.01) +11.04%
BenchmarkTemplate 174ms × (0.99,1.01) 160ms × (0.99,1.01) -7.70%
BenchmarkTimeParse 639ns × (1.00,1.00) 622ns × (1.00,1.00) -2.66%
BenchmarkTimeFormat 736ns × (1.00,1.01) 736ns × (1.00,1.02) ~
Change-Id: Ib3bbeb379f5f4819e6f5dcf69bc88a2b7ed41460
Reviewed-on: https://go-review.googlesource.com/9225
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
2015-04-17 11:07:38 -04:00
|
|
|
src.Type = Types[Tptr]
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), typename(n.Type), &src)
|
|
|
|
|
p3 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &src, nil)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
p3.To = p2.To
|
|
|
|
|
p3.To.Offset -= 2 * int64(Widthptr)
|
|
|
|
|
}
|
cmd/internal/gc: emit typedmemmove write barrier from sgen
Emitting it here instead of rewriting the tree earlier sets us up
to generate an inline check, like we do for single pointers.
But even without the inline check, generating at this level lets
us generate significantly more efficient code, probably due to
having fewer temporaries and less complex high-level code
for the compiler to churn through.
Revcomp is worse, almost certainly due to register pressure.
name old new delta
BenchmarkBinaryTree17 18.0s × (0.99,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.43s × (1.00,1.00) 4.36s × (1.00,1.00) -1.44%
BenchmarkFmtFprintfEmpty 114ns × (0.95,1.05) 86ns × (0.97,1.06) -24.12%
BenchmarkFmtFprintfString 468ns × (0.99,1.01) 420ns × (0.99,1.02) -10.16%
BenchmarkFmtFprintfInt 433ns × (1.00,1.01) 386ns × (0.99,1.02) -10.74%
BenchmarkFmtFprintfIntInt 748ns × (0.99,1.01) 647ns × (0.99,1.01) -13.56%
BenchmarkFmtFprintfPrefixedInt 547ns × (0.99,1.01) 499ns × (0.99,1.02) -8.78%
BenchmarkFmtFprintfFloat 756ns × (1.00,1.01) 689ns × (1.00,1.00) -8.86%
BenchmarkFmtManyArgs 2.79µs × (1.00,1.01) 2.53µs × (1.00,1.00) -9.30%
BenchmarkGobDecode 39.6ms × (0.99,1.00) 39.2ms × (0.98,1.01) -1.07%
BenchmarkGobEncode 37.6ms × (1.00,1.01) 37.5ms × (0.99,1.01) ~
BenchmarkGzip 663ms × (0.99,1.02) 660ms × (0.98,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 143ms × (1.00,1.00) ~
BenchmarkHTTPClientServer 132µs × (0.99,1.01) 133µs × (0.99,1.02) ~
BenchmarkJSONEncode 56.2ms × (0.99,1.01) 54.0ms × (0.98,1.01) -3.97%
BenchmarkJSONDecode 138ms × (1.00,1.00) 134ms × (0.99,1.02) -2.70%
BenchmarkMandelbrot200 6.03ms × (1.00,1.01) 6.00ms × (1.00,1.01) ~
BenchmarkGoParse 9.82ms × (0.93,1.10) 10.35ms × (0.88,1.11) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.00) 163ns × (0.99,1.01) -21.26%
BenchmarkRegexpMatchEasy0_1K 581ns × (1.00,1.01) 566ns × (0.99,1.00) -2.50%
BenchmarkRegexpMatchEasy1_32 185ns × (0.99,1.01) 138ns × (1.00,1.01) -25.41%
BenchmarkRegexpMatchEasy1_1K 975ns × (1.00,1.01) 892ns × (1.00,1.00) -8.51%
BenchmarkRegexpMatchMedium_32 328ns × (0.99,1.00) 252ns × (1.00,1.00) -23.17%
BenchmarkRegexpMatchMedium_1K 88.6µs × (1.00,1.01) 73.0µs × (1.00,1.01) -17.66%
BenchmarkRegexpMatchHard_32 4.69µs × (0.95,1.03) 3.85µs × (1.00,1.01) -17.91%
BenchmarkRegexpMatchHard_1K 133µs × (1.00,1.01) 117µs × (1.00,1.00) -12.34%
BenchmarkRevcomp 902ms × (0.99,1.05) 1001ms × (0.94,1.01) +11.04%
BenchmarkTemplate 174ms × (0.99,1.01) 160ms × (0.99,1.01) -7.70%
BenchmarkTimeParse 639ns × (1.00,1.00) 622ns × (1.00,1.00) -2.66%
BenchmarkTimeFormat 736ns × (1.00,1.01) 736ns × (1.00,1.02) ~
Change-Id: Ib3bbeb379f5f4819e6f5dcf69bc88a2b7ed41460
Reviewed-on: https://go-review.googlesource.com/9225
Reviewed-by: Austin Clements <austin@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
2015-04-17 11:07:38 -04:00
|
|
|
Regfree(&src)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
Ginscall(writebarrierfn(funcName, Types[Tptr], Types[Tptr]), 0)
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
// cgen_norm moves n1 to res, truncating to expected type if necessary.
|
|
|
|
|
// n1 is a register, and cgen_norm frees it.
|
|
|
|
|
func cgen_norm(n, n1, res *Node) {
|
|
|
|
|
switch Ctxt.Arch.Thechar {
|
|
|
|
|
case '6', '8':
|
|
|
|
|
// We use sized math, so the result is already truncated.
|
|
|
|
|
default:
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OADD, OSUB, OMUL, ODIV, OCOM, OMINUS:
|
|
|
|
|
// TODO(rsc): What about left shift?
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, n.Type), n1, n1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(n1, res)
|
|
|
|
|
Regfree(n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Mgen(n *Node, n1 *Node, rg *Node) {
|
|
|
|
|
n1.Op = OEMPTY
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
*n1 = *n
|
|
|
|
|
if n1.Op == OREGISTER || n1.Op == OINDREG {
|
2015-04-13 10:28:57 -07:00
|
|
|
reg[n.Reg-int16(Thearch.REGMIN)]++
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Tempname(n1, n.Type)
|
|
|
|
|
Cgen(n, n1)
|
|
|
|
|
if n.Type.Width <= int64(Widthptr) || Isfloat[n.Type.Etype] {
|
|
|
|
|
n2 := *n1
|
|
|
|
|
Regalloc(n1, n.Type, rg)
|
|
|
|
|
Thearch.Gmove(&n2, n1)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Mfree(n *Node) {
|
|
|
|
|
if n.Op == OREGISTER {
|
|
|
|
|
Regfree(n)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// allocate a register (reusing res if possible) and generate
|
|
|
|
|
// a = n
|
|
|
|
|
// The caller must call Regfree(a).
|
2015-03-18 17:26:36 -04:00
|
|
|
func Cgenr(n *Node, a *Node, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("cgenr-n", n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isfat(n.Type) {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgenr on fat node")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
Regalloc(a, n.Type, res)
|
|
|
|
|
Thearch.Gmove(n, a)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case ONAME,
|
|
|
|
|
ODOT,
|
|
|
|
|
ODOTPTR,
|
|
|
|
|
OINDEX,
|
|
|
|
|
OCALLFUNC,
|
|
|
|
|
OCALLMETH,
|
|
|
|
|
OCALLINTER:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(n, &n1, res)
|
|
|
|
|
Regalloc(a, Types[Tptr], &n1)
|
|
|
|
|
Thearch.Gmove(&n1, a)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
Regalloc(a, n.Type, res)
|
|
|
|
|
Cgen(n, a)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// allocate a register (reusing res if possible) and generate
|
|
|
|
|
// a = &n
|
|
|
|
|
// The caller must call Regfree(a).
|
|
|
|
|
// The generated code checks that the result is not nil.
|
2015-03-18 17:26:36 -04:00
|
|
|
func Agenr(n *Node, a *Node, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("\nagenr-n", n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nl := n.Left
|
|
|
|
|
nr := n.Right
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case ODOT, ODOTPTR, OCALLFUNC, OCALLMETH, OCALLINTER:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(n, &n1, res)
|
|
|
|
|
Regalloc(a, Types[Tptr], &n1)
|
|
|
|
|
Agen(&n1, a)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
case OIND:
|
|
|
|
|
Cgenr(n.Left, a, res)
|
|
|
|
|
Cgen_checknil(a)
|
|
|
|
|
|
|
|
|
|
case OINDEX:
|
|
|
|
|
if Ctxt.Arch.Thechar == '5' {
|
|
|
|
|
var p2 *obj.Prog // to be patched to panicindex.
|
|
|
|
|
w := uint32(n.Type.Width)
|
|
|
|
|
bounded := Debug['B'] != 0 || n.Bounded
|
|
|
|
|
var n1 Node
|
|
|
|
|
var n3 Node
|
2015-04-02 19:58:37 -07:00
|
|
|
if nr.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
var tmp Node
|
|
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
Tempname(&tmp, Types[TINT32])
|
|
|
|
|
}
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Agenr(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
p2 = Thearch.Cgenindex(nr, &tmp, bounded)
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gmove(&tmp, &n1)
|
|
|
|
|
}
|
2015-04-02 19:58:37 -07:00
|
|
|
} else if nl.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
var tmp Node
|
|
|
|
|
Tempname(&tmp, Types[TINT32])
|
|
|
|
|
p2 = Thearch.Cgenindex(nr, &tmp, bounded)
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gmove(&tmp, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Agenr(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
var tmp Node
|
|
|
|
|
Tempname(&tmp, Types[TINT32])
|
|
|
|
|
p2 = Thearch.Cgenindex(nr, &tmp, bounded)
|
|
|
|
|
nr = &tmp
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Agenr(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, tmp.Type), &tmp, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// &a is in &n3 (allocated in res)
|
|
|
|
|
// i is in &n1 (if not constant)
|
|
|
|
|
// w is width
|
|
|
|
|
|
|
|
|
|
// constant index
|
|
|
|
|
if Isconst(nr, CTINT) {
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("constant string constant index")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-05-27 00:47:05 -04:00
|
|
|
v := uint64(Mpgetfix(nr.Val().U.(*Mpint)))
|
2015-03-18 17:26:36 -04:00
|
|
|
var n2 Node
|
|
|
|
|
if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
|
|
|
|
n1 = n3
|
|
|
|
|
n1.Op = OINDREG
|
|
|
|
|
n1.Type = Types[Tptr]
|
|
|
|
|
n1.Xoffset = int64(Array_nel)
|
2015-05-12 15:51:22 -04:00
|
|
|
Nodconst(&n2, Types[TUINT32], int64(v))
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OGT, Types[TUINT32], &n1, &n2, +1)
|
cmd/internal/gc: mark panicindex calls as not returning
Most of the calls to panicindex are already
marked as not returning, but these two were missed
at some point.
Performance changes below.
name old mean new mean delta
BinaryTree17 5.70s × (0.98,1.04) 5.68s × (0.97,1.04) ~ (p=0.681)
Fannkuch11 4.32s × (1.00,1.00) 4.41s × (0.98,1.03) +1.98% (p=0.018)
FmtFprintfEmpty 92.6ns × (0.91,1.11) 92.7ns × (0.91,1.16) ~ (p=0.969)
FmtFprintfString 280ns × (0.97,1.05) 281ns × (0.96,1.08) ~ (p=0.860)
FmtFprintfInt 284ns × (0.99,1.02) 288ns × (0.97,1.06) ~ (p=0.207)
FmtFprintfIntInt 488ns × (0.98,1.01) 493ns × (0.97,1.04) ~ (p=0.271)
FmtFprintfPrefixedInt 418ns × (0.98,1.04) 423ns × (0.97,1.04) ~ (p=0.311)
FmtFprintfFloat 597ns × (1.00,1.00) 598ns × (0.99,1.01) ~ (p=0.789)
FmtManyArgs 1.87µs × (0.99,1.01) 1.89µs × (0.98,1.05) ~ (p=0.158)
GobDecode 14.6ms × (0.99,1.01) 14.8ms × (0.98,1.03) +1.51% (p=0.015)
GobEncode 12.3ms × (0.98,1.03) 12.3ms × (0.98,1.01) ~ (p=0.474)
Gzip 647ms × (1.00,1.01) 656ms × (0.99,1.05) ~ (p=0.104)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~ (p=0.110)
HTTPClientServer 89.6µs × (0.99,1.03) 91.2µs × (0.97,1.04) ~ (p=0.061)
JSONEncode 31.7ms × (0.99,1.01) 32.6ms × (0.97,1.08) +2.87% (p=0.038)
JSONDecode 111ms × (1.00,1.01) 114ms × (0.97,1.05) +2.47% (p=0.040)
Mandelbrot200 6.01ms × (1.00,1.00) 6.11ms × (0.98,1.04) ~ (p=0.073)
GoParse 6.54ms × (0.99,1.02) 6.66ms × (0.97,1.04) ~ (p=0.064)
RegexpMatchEasy0_32 159ns × (0.99,1.02) 159ns × (0.99,1.00) ~ (p=0.693)
RegexpMatchEasy0_1K 540ns × (0.99,1.03) 538ns × (1.00,1.01) ~ (p=0.360)
RegexpMatchEasy1_32 137ns × (0.99,1.01) 138ns × (1.00,1.00) ~ (p=0.511)
RegexpMatchEasy1_1K 867ns × (1.00,1.01) 869ns × (0.99,1.01) ~ (p=0.193)
RegexpMatchMedium_32 252ns × (1.00,1.00) 252ns × (0.99,1.01) ~ (p=0.076)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 72.7µs × (1.00,1.00) ~ (p=0.963)
RegexpMatchHard_32 3.84µs × (1.00,1.00) 3.85µs × (1.00,1.00) ~ (p=0.371)
RegexpMatchHard_1K 117µs × (1.00,1.01) 118µs × (1.00,1.00) ~ (p=0.898)
Revcomp 909ms × (0.98,1.03) 920ms × (0.97,1.07) ~ (p=0.368)
Template 128ms × (0.99,1.01) 129ms × (0.98,1.03) +1.41% (p=0.042)
TimeParse 619ns × (0.98,1.01) 619ns × (0.99,1.01) ~ (p=0.730)
TimeFormat 651ns × (1.00,1.01) 661ns × (0.98,1.04) ~ (p=0.097)
Change-Id: I0ec5baff41f5d282307137ce0d927e6301e4fa10
Reviewed-on: https://go-review.googlesource.com/9811
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:30:59 -04:00
|
|
|
Ginscall(Panicindex, -1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n1 = n3
|
|
|
|
|
n1.Op = OINDREG
|
|
|
|
|
n1.Type = Types[Tptr]
|
|
|
|
|
n1.Xoffset = int64(Array_array)
|
|
|
|
|
Thearch.Gmove(&n1, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Nodconst(&n2, Types[Tptr], int64(v*uint64(w)))
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
*a = n3
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, Types[TINT32], &n1) // i
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
var n4 Node
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
|
|
|
|
// check bounds
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&n4, Types[TUINT32], int64(len(nl.Val().U.(string))))
|
2015-03-18 17:26:36 -04:00
|
|
|
} else if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
n1 = n3
|
|
|
|
|
n1.Op = OINDREG
|
|
|
|
|
n1.Type = Types[Tptr]
|
|
|
|
|
n1.Xoffset = int64(Array_nel)
|
|
|
|
|
Regalloc(&n4, Types[TUINT32], nil)
|
|
|
|
|
Thearch.Gmove(&n1, &n4)
|
|
|
|
|
} else {
|
|
|
|
|
Nodconst(&n4, Types[TUINT32], nl.Type.Bound)
|
|
|
|
|
}
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OLT, Types[TUINT32], &n2, &n4, +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
if n4.Op == OREGISTER {
|
|
|
|
|
Regfree(&n4)
|
|
|
|
|
}
|
|
|
|
|
if p2 != nil {
|
|
|
|
|
Patch(p2, Pc)
|
|
|
|
|
}
|
cmd/internal/gc: mark panicindex calls as not returning
Most of the calls to panicindex are already
marked as not returning, but these two were missed
at some point.
Performance changes below.
name old mean new mean delta
BinaryTree17 5.70s × (0.98,1.04) 5.68s × (0.97,1.04) ~ (p=0.681)
Fannkuch11 4.32s × (1.00,1.00) 4.41s × (0.98,1.03) +1.98% (p=0.018)
FmtFprintfEmpty 92.6ns × (0.91,1.11) 92.7ns × (0.91,1.16) ~ (p=0.969)
FmtFprintfString 280ns × (0.97,1.05) 281ns × (0.96,1.08) ~ (p=0.860)
FmtFprintfInt 284ns × (0.99,1.02) 288ns × (0.97,1.06) ~ (p=0.207)
FmtFprintfIntInt 488ns × (0.98,1.01) 493ns × (0.97,1.04) ~ (p=0.271)
FmtFprintfPrefixedInt 418ns × (0.98,1.04) 423ns × (0.97,1.04) ~ (p=0.311)
FmtFprintfFloat 597ns × (1.00,1.00) 598ns × (0.99,1.01) ~ (p=0.789)
FmtManyArgs 1.87µs × (0.99,1.01) 1.89µs × (0.98,1.05) ~ (p=0.158)
GobDecode 14.6ms × (0.99,1.01) 14.8ms × (0.98,1.03) +1.51% (p=0.015)
GobEncode 12.3ms × (0.98,1.03) 12.3ms × (0.98,1.01) ~ (p=0.474)
Gzip 647ms × (1.00,1.01) 656ms × (0.99,1.05) ~ (p=0.104)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) ~ (p=0.110)
HTTPClientServer 89.6µs × (0.99,1.03) 91.2µs × (0.97,1.04) ~ (p=0.061)
JSONEncode 31.7ms × (0.99,1.01) 32.6ms × (0.97,1.08) +2.87% (p=0.038)
JSONDecode 111ms × (1.00,1.01) 114ms × (0.97,1.05) +2.47% (p=0.040)
Mandelbrot200 6.01ms × (1.00,1.00) 6.11ms × (0.98,1.04) ~ (p=0.073)
GoParse 6.54ms × (0.99,1.02) 6.66ms × (0.97,1.04) ~ (p=0.064)
RegexpMatchEasy0_32 159ns × (0.99,1.02) 159ns × (0.99,1.00) ~ (p=0.693)
RegexpMatchEasy0_1K 540ns × (0.99,1.03) 538ns × (1.00,1.01) ~ (p=0.360)
RegexpMatchEasy1_32 137ns × (0.99,1.01) 138ns × (1.00,1.00) ~ (p=0.511)
RegexpMatchEasy1_1K 867ns × (1.00,1.01) 869ns × (0.99,1.01) ~ (p=0.193)
RegexpMatchMedium_32 252ns × (1.00,1.00) 252ns × (0.99,1.01) ~ (p=0.076)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 72.7µs × (1.00,1.00) ~ (p=0.963)
RegexpMatchHard_32 3.84µs × (1.00,1.00) 3.85µs × (1.00,1.00) ~ (p=0.371)
RegexpMatchHard_1K 117µs × (1.00,1.01) 118µs × (1.00,1.00) ~ (p=0.898)
Revcomp 909ms × (0.98,1.03) 920ms × (0.97,1.07) ~ (p=0.368)
Template 128ms × (0.99,1.01) 129ms × (0.98,1.03) +1.41% (p=0.042)
TimeParse 619ns × (0.98,1.01) 619ns × (0.99,1.01) ~ (p=0.730)
TimeFormat 651ns × (1.00,1.01) 661ns × (0.98,1.04) ~ (p=0.097)
Change-Id: I0ec5baff41f5d282307137ce0d927e6301e4fa10
Reviewed-on: https://go-review.googlesource.com/9811
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:30:59 -04:00
|
|
|
Ginscall(Panicindex, -1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
|
|
|
|
Regalloc(&n3, Types[Tptr], res)
|
|
|
|
|
p1 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), nil, &n3)
|
2015-05-27 00:47:05 -04:00
|
|
|
Datastring(nl.Val().U.(string), &p1.From)
|
2015-03-18 17:26:36 -04:00
|
|
|
p1.From.Type = obj.TYPE_ADDR
|
|
|
|
|
} else if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
n1 = n3
|
|
|
|
|
n1.Op = OINDREG
|
|
|
|
|
n1.Type = Types[Tptr]
|
|
|
|
|
n1.Xoffset = int64(Array_array)
|
|
|
|
|
Thearch.Gmove(&n1, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if w == 0 {
|
|
|
|
|
// nothing to do
|
|
|
|
|
} else if Thearch.AddIndex != nil && Thearch.AddIndex(&n2, int64(w), &n3) {
|
|
|
|
|
// done by back end
|
|
|
|
|
} else if w == 1 {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
} else {
|
2015-04-30 10:41:57 -07:00
|
|
|
if w&(w-1) == 0 {
|
|
|
|
|
// Power of 2. Use shift.
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OLSH, Types[TUINT32]), int64(log2(uint64(w))), &n2)
|
|
|
|
|
} else {
|
|
|
|
|
// Not a power of 2. Use multiply.
|
|
|
|
|
Regalloc(&n4, Types[TUINT32], nil)
|
|
|
|
|
Nodconst(&n1, Types[TUINT32], int64(w))
|
|
|
|
|
Thearch.Gmove(&n1, &n4)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OMUL, Types[TUINT32]), &n4, &n2)
|
|
|
|
|
Regfree(&n4)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
}
|
|
|
|
|
*a = n3
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if Ctxt.Arch.Thechar == '8' {
|
|
|
|
|
var p2 *obj.Prog // to be patched to panicindex.
|
|
|
|
|
w := uint32(n.Type.Width)
|
|
|
|
|
bounded := Debug['B'] != 0 || n.Bounded
|
|
|
|
|
var n3 Node
|
|
|
|
|
var tmp Node
|
|
|
|
|
var n1 Node
|
2015-04-02 19:58:37 -07:00
|
|
|
if nr.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
// Generate &nl first, and move nr into register.
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Igen(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
p2 = Thearch.Igenindex(nr, &tmp, bounded)
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gmove(&tmp, &n1)
|
|
|
|
|
}
|
2015-04-02 19:58:37 -07:00
|
|
|
} else if nl.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
// Generate nr first, and move &nl into register.
|
|
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
p2 = Thearch.Igenindex(nr, &tmp, bounded)
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gmove(&tmp, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Igen(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
p2 = Thearch.Igenindex(nr, &tmp, bounded)
|
|
|
|
|
nr = &tmp
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
Igen(nl, &n3, res)
|
|
|
|
|
}
|
|
|
|
|
Regalloc(&n1, tmp.Type, nil)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, tmp.Type), &tmp, &n1)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// For fixed array we really want the pointer in n3.
|
|
|
|
|
var n2 Node
|
|
|
|
|
if Isfixedarray(nl.Type) {
|
|
|
|
|
Regalloc(&n2, Types[Tptr], &n3)
|
|
|
|
|
Agen(&n3, &n2)
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
n3 = n2
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// &a[0] is in n3 (allocated in res)
|
|
|
|
|
// i is in n1 (if not constant)
|
|
|
|
|
// len(a) is in nlen (if needed)
|
|
|
|
|
// w is width
|
|
|
|
|
|
|
|
|
|
// constant index
|
|
|
|
|
if Isconst(nr, CTINT) {
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("constant string constant index") // front end should handle
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-05-27 00:47:05 -04:00
|
|
|
v := uint64(Mpgetfix(nr.Val().U.(*Mpint)))
|
2015-03-18 17:26:36 -04:00
|
|
|
if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
|
|
|
|
nlen := n3
|
|
|
|
|
nlen.Type = Types[TUINT32]
|
|
|
|
|
nlen.Xoffset += int64(Array_nel)
|
|
|
|
|
Nodconst(&n2, Types[TUINT32], int64(v))
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OGT, Types[TUINT32], &nlen, &n2, +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Ginscall(Panicindex, -1)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load base pointer in n2 = n3.
|
|
|
|
|
Regalloc(&n2, Types[Tptr], &n3)
|
|
|
|
|
|
|
|
|
|
n3.Type = Types[Tptr]
|
|
|
|
|
n3.Xoffset += int64(Array_array)
|
|
|
|
|
Thearch.Gmove(&n3, &n2)
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
if v*uint64(w) != 0 {
|
|
|
|
|
Nodconst(&n1, Types[Tptr], int64(v*uint64(w)))
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n1, &n2)
|
|
|
|
|
}
|
|
|
|
|
*a = n2
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// i is in register n1, extend to 32 bits.
|
|
|
|
|
t := Types[TUINT32]
|
|
|
|
|
|
|
|
|
|
if Issigned[n1.Type.Etype] {
|
|
|
|
|
t = Types[TINT32]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Regalloc(&n2, t, &n1) // i
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
|
|
|
|
// check bounds
|
|
|
|
|
t := Types[TUINT32]
|
|
|
|
|
|
|
|
|
|
var nlen Node
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&nlen, t, int64(len(nl.Val().U.(string))))
|
2015-03-18 17:26:36 -04:00
|
|
|
} else if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
nlen = n3
|
|
|
|
|
nlen.Type = t
|
|
|
|
|
nlen.Xoffset += int64(Array_nel)
|
|
|
|
|
} else {
|
|
|
|
|
Nodconst(&nlen, t, nl.Type.Bound)
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OLT, t, &n2, &nlen, +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
if p2 != nil {
|
|
|
|
|
Patch(p2, Pc)
|
|
|
|
|
}
|
|
|
|
|
Ginscall(Panicindex, -1)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
|
|
|
|
Regalloc(&n3, Types[Tptr], res)
|
|
|
|
|
p1 := Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), nil, &n3)
|
2015-05-27 00:47:05 -04:00
|
|
|
Datastring(nl.Val().U.(string), &p1.From)
|
2015-03-18 17:26:36 -04:00
|
|
|
p1.From.Type = obj.TYPE_ADDR
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, n3.Type), &n2, &n3)
|
|
|
|
|
goto indexdone1
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Load base pointer in n3.
|
|
|
|
|
Regalloc(&tmp, Types[Tptr], &n3)
|
|
|
|
|
|
|
|
|
|
if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
n3.Type = Types[Tptr]
|
|
|
|
|
n3.Xoffset += int64(Array_array)
|
|
|
|
|
Thearch.Gmove(&n3, &tmp)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
n3 = tmp
|
|
|
|
|
|
|
|
|
|
if w == 0 {
|
|
|
|
|
// nothing to do
|
|
|
|
|
} else if Thearch.AddIndex != nil && Thearch.AddIndex(&n2, int64(w), &n3) {
|
|
|
|
|
// done by back end
|
|
|
|
|
} else if w == 1 {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
} else {
|
2015-04-30 10:41:57 -07:00
|
|
|
if w&(w-1) == 0 {
|
|
|
|
|
// Power of 2. Use shift.
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OLSH, Types[TUINT32]), int64(log2(uint64(w))), &n2)
|
|
|
|
|
} else {
|
|
|
|
|
// Not a power of 2. Use multiply.
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OMUL, Types[TUINT32]), int64(w), &n2)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
indexdone1:
|
|
|
|
|
*a = n3
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
freelen := 0
|
|
|
|
|
w := uint64(n.Type.Width)
|
|
|
|
|
|
|
|
|
|
// Generate the non-addressable child first.
|
|
|
|
|
var n3 Node
|
|
|
|
|
var nlen Node
|
|
|
|
|
var tmp Node
|
|
|
|
|
var n1 Node
|
2015-04-02 19:58:37 -07:00
|
|
|
if nr.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
goto irad
|
|
|
|
|
}
|
2015-04-02 19:58:37 -07:00
|
|
|
if nl.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
Cgenr(nr, &n1, nil)
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
if Isfixedarray(nl.Type) {
|
|
|
|
|
Agenr(nl, &n3, res)
|
|
|
|
|
} else {
|
|
|
|
|
Igen(nl, &nlen, res)
|
|
|
|
|
freelen = 1
|
|
|
|
|
nlen.Type = Types[Tptr]
|
|
|
|
|
nlen.Xoffset += int64(Array_array)
|
|
|
|
|
Regalloc(&n3, Types[Tptr], res)
|
|
|
|
|
Thearch.Gmove(&nlen, &n3)
|
|
|
|
|
nlen.Type = Types[Simtype[TUINT]]
|
|
|
|
|
nlen.Xoffset += int64(Array_nel) - int64(Array_array)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
goto index
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Tempname(&tmp, nr.Type)
|
|
|
|
|
Cgen(nr, &tmp)
|
|
|
|
|
nr = &tmp
|
|
|
|
|
|
|
|
|
|
irad:
|
|
|
|
|
if !Isconst(nl, CTSTR) {
|
|
|
|
|
if Isfixedarray(nl.Type) {
|
|
|
|
|
Agenr(nl, &n3, res)
|
|
|
|
|
} else {
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nl.Addable {
|
2015-03-20 00:06:10 -04:00
|
|
|
if res != nil && res.Op == OREGISTER { // give up res, which we don't need yet.
|
|
|
|
|
Regfree(res)
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
// igen will need an addressable node.
|
|
|
|
|
var tmp2 Node
|
|
|
|
|
Tempname(&tmp2, nl.Type)
|
|
|
|
|
Cgen(nl, &tmp2)
|
|
|
|
|
nl = &tmp2
|
2015-03-20 00:06:10 -04:00
|
|
|
|
|
|
|
|
if res != nil && res.Op == OREGISTER { // reacquire res
|
|
|
|
|
Regrealloc(res)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Igen(nl, &nlen, res)
|
|
|
|
|
freelen = 1
|
|
|
|
|
nlen.Type = Types[Tptr]
|
|
|
|
|
nlen.Xoffset += int64(Array_array)
|
|
|
|
|
Regalloc(&n3, Types[Tptr], res)
|
|
|
|
|
Thearch.Gmove(&nlen, &n3)
|
|
|
|
|
nlen.Type = Types[Simtype[TUINT]]
|
|
|
|
|
nlen.Xoffset += int64(Array_nel) - int64(Array_array)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !Isconst(nr, CTINT) {
|
|
|
|
|
Cgenr(nr, &n1, nil)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
goto index
|
|
|
|
|
|
|
|
|
|
// &a is in &n3 (allocated in res)
|
|
|
|
|
// i is in &n1 (if not constant)
|
|
|
|
|
// len(a) is in nlen (if needed)
|
|
|
|
|
// w is width
|
|
|
|
|
|
|
|
|
|
// constant index
|
|
|
|
|
index:
|
|
|
|
|
if Isconst(nr, CTINT) {
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("constant string constant index") // front end should handle
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-05-27 00:47:05 -04:00
|
|
|
v := uint64(Mpgetfix(nr.Val().U.(*Mpint)))
|
2015-03-18 17:26:36 -04:00
|
|
|
if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OGT, Types[Simtype[TUINT]], &nlen, Nodintconst(int64(v)), +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Ginscall(Panicindex, -1)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Regfree(&nlen)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if v*w != 0 {
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OADD, Types[Tptr]), int64(v*w), &n3)
|
|
|
|
|
}
|
|
|
|
|
*a = n3
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// type of the index
|
|
|
|
|
t := Types[TUINT64]
|
|
|
|
|
|
|
|
|
|
if Issigned[n1.Type.Etype] {
|
|
|
|
|
t = Types[TINT64]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, t, &n1) // i
|
|
|
|
|
Thearch.Gmove(&n1, &n2)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
if Debug['B'] == 0 && !n.Bounded {
|
|
|
|
|
// check bounds
|
|
|
|
|
t = Types[Simtype[TUINT]]
|
|
|
|
|
|
|
|
|
|
if Is64(nr.Type) {
|
|
|
|
|
t = Types[TUINT64]
|
|
|
|
|
}
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&nlen, t, int64(len(nl.Val().U.(string))))
|
2015-03-18 17:26:36 -04:00
|
|
|
} else if Isslice(nl.Type) || nl.Type.Etype == TSTRING {
|
2015-05-06 12:28:19 -04:00
|
|
|
// nlen already initialized
|
2015-03-18 17:26:36 -04:00
|
|
|
} else {
|
|
|
|
|
Nodconst(&nlen, t, nl.Type.Bound)
|
|
|
|
|
}
|
|
|
|
|
|
2015-05-06 12:28:19 -04:00
|
|
|
p1 := Thearch.Ginscmp(OLT, t, &n2, &nlen, +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Ginscall(Panicindex, -1)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isconst(nl, CTSTR) {
|
|
|
|
|
Regalloc(&n3, Types[Tptr], res)
|
|
|
|
|
p1 := Thearch.Gins(Thearch.Optoas(OAS, n3.Type), nil, &n3) // XXX was LEAQ!
|
2015-05-27 00:47:05 -04:00
|
|
|
Datastring(nl.Val().U.(string), &p1.From)
|
2015-03-18 17:26:36 -04:00
|
|
|
p1.From.Type = obj.TYPE_ADDR
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, n3.Type), &n2, &n3)
|
|
|
|
|
goto indexdone
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if w == 0 {
|
|
|
|
|
// nothing to do
|
|
|
|
|
} else if Thearch.AddIndex != nil && Thearch.AddIndex(&n2, int64(w), &n3) {
|
|
|
|
|
// done by back end
|
|
|
|
|
} else if w == 1 {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
} else {
|
2015-04-30 10:41:57 -07:00
|
|
|
if w&(w-1) == 0 {
|
|
|
|
|
// Power of 2. Use shift.
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OLSH, t), int64(log2(w)), &n2)
|
|
|
|
|
} else {
|
|
|
|
|
// Not a power of 2. Use multiply.
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OMUL, t), int64(w), &n2)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
indexdone:
|
|
|
|
|
*a = n3
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
if freelen != 0 {
|
|
|
|
|
Regfree(&nlen)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
Regalloc(a, Types[Tptr], res)
|
|
|
|
|
Agen(n, a)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-30 10:41:57 -07:00
|
|
|
// log2 returns the logarithm base 2 of n. n must be a power of 2.
|
|
|
|
|
func log2(n uint64) int {
|
|
|
|
|
x := 0
|
|
|
|
|
for n>>uint(x) != 1 {
|
|
|
|
|
x++
|
|
|
|
|
}
|
|
|
|
|
return x
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate:
|
|
|
|
|
// res = &n;
|
|
|
|
|
// The generated code checks that the result is not nil.
|
2015-03-18 17:26:36 -04:00
|
|
|
func Agen(n *Node, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("\nagen-res", res)
|
|
|
|
|
Dump("agen-r", n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n == nil || n.Type == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for n.Op == OCONVNOP {
|
|
|
|
|
n = n.Left
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Isconst(n, CTNIL) && n.Type.Width > int64(Widthptr) {
|
|
|
|
|
// Use of a nil interface or nil slice.
|
|
|
|
|
// Create a temporary we can take the address of and read.
|
|
|
|
|
// The generated code is just going to panic, so it need not
|
|
|
|
|
// be terribly efficient. See issue 3670.
|
|
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
|
|
|
|
|
|
|
|
|
Gvardef(&n1)
|
|
|
|
|
Thearch.Clearfat(&n1)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, Types[Tptr], res)
|
|
|
|
|
var n3 Node
|
|
|
|
|
n3.Op = OADDR
|
|
|
|
|
n3.Left = &n1
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &n3, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: use MOV R0, R1 instead of LEA 0(R0), R1 in Agen
Minor code generation optimization I've been meaning to do
for a while and noticed while working on the emitted write
barrier code. Using MOV lets the compiler and maybe the
processor do copy propagation.
name old new delta
BenchmarkBinaryTree17 17.9s × (0.99,1.01) 18.0s × (0.99,1.01) ~
BenchmarkFannkuch11 4.42s × (1.00,1.00) 4.36s × (1.00,1.00) -1.39%
BenchmarkFmtFprintfEmpty 118ns × (0.96,1.02) 120ns × (0.99,1.06) ~
BenchmarkFmtFprintfString 486ns × (0.99,1.01) 480ns × (0.99,1.01) -1.34%
BenchmarkFmtFprintfInt 457ns × (0.99,1.01) 451ns × (0.99,1.01) -1.31%
BenchmarkFmtFprintfIntInt 768ns × (1.00,1.01) 766ns × (0.99,1.01) ~
BenchmarkFmtFprintfPrefixedInt 584ns × (0.99,1.03) 569ns × (0.99,1.01) -2.57%
BenchmarkFmtFprintfFloat 739ns × (0.99,1.00) 728ns × (1.00,1.01) -1.49%
BenchmarkFmtManyArgs 2.77µs × (1.00,1.00) 2.81µs × (1.00,1.01) +1.53%
BenchmarkGobDecode 39.3ms × (0.99,1.01) 39.4ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (0.99,1.00) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 661ms × (0.99,1.01) 660ms × (1.00,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 143ms × (1.00,1.00) +0.20%
BenchmarkHTTPClientServer 133µs × (0.98,1.01) 132µs × (0.99,1.01) ~
BenchmarkJSONEncode 56.5ms × (0.99,1.01) 57.1ms × (0.99,1.01) +0.94%
BenchmarkJSONDecode 143ms × (1.00,1.00) 138ms × (1.00,1.01) -3.22%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.02ms × (1.00,1.00) ~
BenchmarkGoParse 9.63ms × (0.94,1.07) 9.79ms × (0.92,1.07) ~
BenchmarkRegexpMatchEasy0_32 210ns × (1.00,1.00) 210ns × (1.00,1.01) ~
BenchmarkRegexpMatchEasy0_1K 596ns × (0.99,1.01) 593ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 182ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (0.99,1.01) 1.01µs × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (1.00,1.01) 331ns × (1.00,1.00) +1.22%
BenchmarkRegexpMatchMedium_1K 93.0µs × (1.00,1.02) 92.6µs × (1.00,1.01) ~
BenchmarkRegexpMatchHard_32 4.76µs × (0.95,1.01) 4.58µs × (0.99,1.05) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.01) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 892ms × (1.00,1.01) 900ms × (0.99,1.06) ~
BenchmarkTemplate 175ms × (0.99,1.00) 171ms × (1.00,1.01) -2.36%
BenchmarkTimeParse 638ns × (1.00,1.00) 637ns × (1.00,1.00) ~
BenchmarkTimeFormat 772ns × (1.00,1.00) 742ns × (1.00,1.00) -3.95%
Change-Id: I6504e310cb9cf48a73d539c478b4dbcacde208b2
Reviewed-on: https://go-review.googlesource.com/9308
Reviewed-by: Austin Clements <austin@google.com>
2015-04-24 10:50:18 -04:00
|
|
|
if n.Op == OINDREG && n.Xoffset == 0 {
|
|
|
|
|
// Generate MOVW R0, R1 instead of MOVW $0(R0), R1.
|
|
|
|
|
// This allows better move propagation in the back ends
|
|
|
|
|
// (and maybe it helps the processor).
|
|
|
|
|
n1 := *n
|
|
|
|
|
n1.Op = OREGISTER
|
|
|
|
|
n1.Type = res.Type
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if n.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
if n.Op == OREGISTER {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("agen OREGISTER")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
var n1 Node
|
|
|
|
|
n1.Op = OADDR
|
|
|
|
|
n1.Left = n
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, Types[Tptr], res)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &n1, &n2)
|
|
|
|
|
Thearch.Gmove(&n2, res)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nl := n.Left
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("agen: unknown op %v", Nconv(n, obj.FmtShort|obj.FmtSign))
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
case OCALLMETH:
|
|
|
|
|
cgen_callmeth(n, 0)
|
|
|
|
|
cgen_aret(n, res)
|
|
|
|
|
|
|
|
|
|
case OCALLINTER:
|
|
|
|
|
cgen_callinter(n, res, 0)
|
|
|
|
|
cgen_aret(n, res)
|
|
|
|
|
|
|
|
|
|
case OCALLFUNC:
|
|
|
|
|
cgen_call(n, 0)
|
|
|
|
|
cgen_aret(n, res)
|
|
|
|
|
|
2015-03-20 00:06:10 -04:00
|
|
|
case OEFACE, ODOTTYPE, OSLICE, OSLICEARR, OSLICESTR, OSLICE3, OSLICE3ARR:
|
2015-03-18 17:26:36 -04:00
|
|
|
var n1 Node
|
|
|
|
|
Tempname(&n1, n.Type)
|
2015-03-20 00:06:10 -04:00
|
|
|
Cgen(n, &n1)
|
2015-03-18 17:26:36 -04:00
|
|
|
Agen(&n1, res)
|
|
|
|
|
|
|
|
|
|
case OINDEX:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Agenr(n, &n1, res)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
|
|
|
|
|
case ONAME:
|
|
|
|
|
// should only get here with names in this func.
|
2015-05-27 00:44:05 -04:00
|
|
|
if n.Name.Funcdepth > 0 && n.Name.Funcdepth != Funcdepth {
|
2015-03-18 17:26:36 -04:00
|
|
|
Dump("bad agen", n)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("agen: bad ONAME funcdepth %d != %d", n.Name.Funcdepth, Funcdepth)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// should only get here for heap vars or paramref
|
|
|
|
|
if n.Class&PHEAP == 0 && n.Class != PPARAMREF {
|
|
|
|
|
Dump("bad agen", n)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("agen: bad ONAME class %#x", n.Class)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-05-15 10:02:19 -07:00
|
|
|
Cgen(n.Name.Heapaddr, res)
|
2015-03-18 17:26:36 -04:00
|
|
|
if n.Xoffset != 0 {
|
|
|
|
|
addOffset(res, n.Xoffset)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case OIND:
|
|
|
|
|
Cgen(nl, res)
|
|
|
|
|
Cgen_checknil(res)
|
|
|
|
|
|
|
|
|
|
case ODOT:
|
|
|
|
|
Agen(nl, res)
|
|
|
|
|
if n.Xoffset != 0 {
|
|
|
|
|
addOffset(res, n.Xoffset)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case ODOTPTR:
|
|
|
|
|
Cgen(nl, res)
|
|
|
|
|
Cgen_checknil(res)
|
|
|
|
|
if n.Xoffset != 0 {
|
|
|
|
|
addOffset(res, n.Xoffset)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func addOffset(res *Node, offset int64) {
|
|
|
|
|
if Ctxt.Arch.Thechar == '6' || Ctxt.Arch.Thechar == '8' {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), Nodintconst(offset), res)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1, n2 Node
|
|
|
|
|
Regalloc(&n1, Types[Tptr], nil)
|
|
|
|
|
Thearch.Gmove(res, &n1)
|
|
|
|
|
Regalloc(&n2, Types[Tptr], nil)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), Nodintconst(offset), &n2)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &n2, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
|
2015-03-20 00:06:10 -04:00
|
|
|
// Igen computes the address &n, stores it in a register r,
|
|
|
|
|
// and rewrites a to refer to *r. The chosen r may be the
|
|
|
|
|
// stack pointer, it may be borrowed from res, or it may
|
|
|
|
|
// be a newly allocated register. The caller must call Regfree(a)
|
|
|
|
|
// to free r when the address is no longer needed.
|
|
|
|
|
// The generated code ensures that &n is not nil.
|
2015-03-18 17:26:36 -04:00
|
|
|
func Igen(n *Node, a *Node, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("\nigen-n", n)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case ONAME:
|
|
|
|
|
if (n.Class&PHEAP != 0) || n.Class == PPARAMREF {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
*a = *n
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OINDREG:
|
|
|
|
|
// Increase the refcount of the register so that igen's caller
|
|
|
|
|
// has to call Regfree.
|
2015-04-13 10:28:57 -07:00
|
|
|
if n.Reg != int16(Thearch.REGSP) {
|
|
|
|
|
reg[n.Reg-int16(Thearch.REGMIN)]++
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
*a = *n
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case ODOT:
|
|
|
|
|
Igen(n.Left, a, res)
|
|
|
|
|
a.Xoffset += n.Xoffset
|
|
|
|
|
a.Type = n.Type
|
|
|
|
|
Fixlargeoffset(a)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case ODOTPTR:
|
|
|
|
|
Cgenr(n.Left, a, res)
|
|
|
|
|
Cgen_checknil(a)
|
|
|
|
|
a.Op = OINDREG
|
|
|
|
|
a.Xoffset += n.Xoffset
|
|
|
|
|
a.Type = n.Type
|
|
|
|
|
Fixlargeoffset(a)
|
|
|
|
|
return
|
|
|
|
|
|
2015-04-01 09:38:44 -07:00
|
|
|
case OCALLFUNC, OCALLMETH, OCALLINTER:
|
2015-03-18 17:26:36 -04:00
|
|
|
switch n.Op {
|
|
|
|
|
case OCALLFUNC:
|
|
|
|
|
cgen_call(n, 0)
|
|
|
|
|
|
|
|
|
|
case OCALLMETH:
|
|
|
|
|
cgen_callmeth(n, 0)
|
|
|
|
|
|
|
|
|
|
case OCALLINTER:
|
|
|
|
|
cgen_callinter(n, nil, 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var flist Iter
|
|
|
|
|
fp := Structfirst(&flist, Getoutarg(n.Left.Type))
|
|
|
|
|
*a = Node{}
|
|
|
|
|
a.Op = OINDREG
|
2015-04-13 10:28:57 -07:00
|
|
|
a.Reg = int16(Thearch.REGSP)
|
2015-04-02 19:58:37 -07:00
|
|
|
a.Addable = true
|
2015-10-08 22:13:44 +13:00
|
|
|
a.Xoffset = fp.Width + Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
a.Type = n.Type
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
// Index of fixed-size array by constant can
|
|
|
|
|
// put the offset in the addressing.
|
|
|
|
|
// Could do the same for slice except that we need
|
|
|
|
|
// to use the real index for the bounds checking.
|
|
|
|
|
case OINDEX:
|
|
|
|
|
if Isfixedarray(n.Left.Type) || (Isptr[n.Left.Type.Etype] && Isfixedarray(n.Left.Left.Type)) {
|
|
|
|
|
if Isconst(n.Right, CTINT) {
|
|
|
|
|
// Compute &a.
|
|
|
|
|
if !Isptr[n.Left.Type.Etype] {
|
|
|
|
|
Igen(n.Left, a, res)
|
|
|
|
|
} else {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Igen(n.Left, &n1, res)
|
|
|
|
|
Cgen_checknil(&n1)
|
|
|
|
|
Regalloc(a, Types[Tptr], res)
|
|
|
|
|
Thearch.Gmove(&n1, a)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
a.Op = OINDREG
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compute &a[i] as &a + i*width.
|
|
|
|
|
a.Type = n.Type
|
|
|
|
|
|
2015-05-27 00:47:05 -04:00
|
|
|
a.Xoffset += Mpgetfix(n.Right.Val().U.(*Mpint)) * n.Type.Width
|
2015-03-18 17:26:36 -04:00
|
|
|
Fixlargeoffset(a)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Agenr(n, a, res)
|
|
|
|
|
a.Op = OINDREG
|
|
|
|
|
a.Type = n.Type
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// Bgen generates code for branches:
|
|
|
|
|
//
|
|
|
|
|
// if n == wantTrue {
|
|
|
|
|
// goto to
|
|
|
|
|
// }
|
|
|
|
|
func Bgen(n *Node, wantTrue bool, likely int, to *obj.Prog) {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenx(n, nil, wantTrue, likely, to)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Bvgen generates code for calculating boolean values:
|
|
|
|
|
// res = n == wantTrue
|
|
|
|
|
func Bvgen(n, res *Node, wantTrue bool) {
|
|
|
|
|
if Thearch.Ginsboolval == nil {
|
|
|
|
|
// Direct value generation not implemented for this architecture.
|
|
|
|
|
// Implement using jumps.
|
|
|
|
|
bvgenjump(n, res, wantTrue, true)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
bgenx(n, res, wantTrue, 0, nil)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bvgenjump implements boolean value generation using jumps:
|
|
|
|
|
// if n == wantTrue {
|
|
|
|
|
// res = 1
|
|
|
|
|
// } else {
|
|
|
|
|
// res = 0
|
|
|
|
|
// }
|
|
|
|
|
// geninit controls whether n's Ninit is generated.
|
|
|
|
|
func bvgenjump(n, res *Node, wantTrue, geninit bool) {
|
|
|
|
|
init := n.Ninit
|
|
|
|
|
if !geninit {
|
2016-03-03 15:08:25 -08:00
|
|
|
setNodeSeq(&n.Ninit, nil)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
|
|
|
|
p1 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
p2 := Pc
|
|
|
|
|
Thearch.Gmove(Nodbool(true), res)
|
|
|
|
|
p3 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
Bgen(n, wantTrue, 0, p2)
|
|
|
|
|
Thearch.Gmove(Nodbool(false), res)
|
|
|
|
|
Patch(p3, Pc)
|
2016-03-03 15:08:25 -08:00
|
|
|
setNodeSeq(&n.Ninit, init)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bgenx is the backend for Bgen and Bvgen.
|
|
|
|
|
// If res is nil, it generates a branch.
|
|
|
|
|
// Otherwise, it generates a boolean value.
|
|
|
|
|
func bgenx(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
|
2015-03-18 17:26:36 -04:00
|
|
|
if Debug['g'] != 0 {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
fmt.Printf("\nbgenx wantTrue=%t likely=%d to=%v\n", wantTrue, likely, to)
|
|
|
|
|
Dump("n", n)
|
|
|
|
|
Dump("res", res)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
genval := res != nil
|
|
|
|
|
|
2015-03-18 17:26:36 -04:00
|
|
|
if n == nil {
|
|
|
|
|
n = Nodbool(true)
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
Genlist(n.Ninit)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
if n.Type == nil {
|
|
|
|
|
Convlit(&n, Types[TBOOL])
|
|
|
|
|
if n.Type == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
if n.Type.Etype != TBOOL {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("bgen: bad type %v for %v", n.Type, Oconv(int(n.Op), 0))
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for n.Op == OCONVNOP {
|
|
|
|
|
n = n.Left
|
2015-04-06 19:36:36 -07:00
|
|
|
Genlist(n.Ninit)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if Thearch.Bgen_float != nil && n.Left != nil && Isfloat[n.Left.Type.Etype] {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
bvgenjump(n, res, wantTrue, false)
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
Thearch.Bgen_float(n, wantTrue, likely, to)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
Cgen(n, res)
|
|
|
|
|
if !wantTrue {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var tmp Node
|
|
|
|
|
Regalloc(&tmp, n.Type, nil)
|
|
|
|
|
Cgen(n, &tmp)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenNonZero(&tmp, nil, wantTrue, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
Regfree(&tmp)
|
|
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
case ONAME:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
// 5g, 7g, and 9g might need a temporary or other help here,
|
|
|
|
|
// but they don't support direct generation of a bool value yet.
|
|
|
|
|
// We can fix that as we go.
|
|
|
|
|
switch Ctxt.Arch.Thechar {
|
2015-09-10 11:33:09 -04:00
|
|
|
case '0', '5', '7', '9':
|
|
|
|
|
Fatalf("genval 0g, 5g, 7g, 9g ONAMES not fully implemented")
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
|
|
|
|
Cgen(n, res)
|
|
|
|
|
if !wantTrue {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-09-10 11:33:09 -04:00
|
|
|
if n.Addable && Ctxt.Arch.Thechar != '0' && Ctxt.Arch.Thechar != '5' && Ctxt.Arch.Thechar != '7' && Ctxt.Arch.Thechar != '9' {
|
2015-04-06 19:36:36 -07:00
|
|
|
// no need for a temporary
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenNonZero(n, nil, wantTrue, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
var tmp Node
|
|
|
|
|
Regalloc(&tmp, n.Type, nil)
|
|
|
|
|
Cgen(n, &tmp)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenNonZero(&tmp, nil, wantTrue, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
Regfree(&tmp)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
case OLITERAL:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
// n is a constant.
|
2015-04-06 19:36:36 -07:00
|
|
|
if !Isconst(n, CTBOOL) {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("bgen: non-bool const %v\n", Nconv(n, obj.FmtLong))
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
2015-05-27 00:47:05 -04:00
|
|
|
Cgen(Nodbool(wantTrue == n.Val().U.(bool)), res)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
// If n == wantTrue, jump; otherwise do nothing.
|
2015-05-27 00:47:05 -04:00
|
|
|
if wantTrue == n.Val().U.(bool) {
|
2015-04-06 19:36:36 -07:00
|
|
|
Patch(Gbranch(obj.AJMP, nil, likely), to)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OANDAND, OOROR:
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
and := (n.Op == OANDAND) == wantTrue
|
|
|
|
|
if genval {
|
|
|
|
|
p1 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
p2 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p2, Pc)
|
|
|
|
|
Cgen(Nodbool(!and), res)
|
|
|
|
|
p3 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
Bgen(n.Left, wantTrue != and, 0, p2)
|
|
|
|
|
Bvgen(n.Right, res, wantTrue)
|
|
|
|
|
Patch(p3, Pc)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if and {
|
2015-03-18 17:26:36 -04:00
|
|
|
p1 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
p2 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p1, Pc)
|
2015-04-06 19:36:36 -07:00
|
|
|
Bgen(n.Left, !wantTrue, -likely, p2)
|
|
|
|
|
Bgen(n.Right, !wantTrue, -likely, p2)
|
2015-03-18 17:26:36 -04:00
|
|
|
p1 = Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p1, to)
|
|
|
|
|
Patch(p2, Pc)
|
|
|
|
|
} else {
|
2015-04-06 19:36:36 -07:00
|
|
|
Bgen(n.Left, wantTrue, likely, to)
|
|
|
|
|
Bgen(n.Right, wantTrue, likely, to)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case ONOT: // unary
|
2015-04-06 19:36:36 -07:00
|
|
|
if n.Left == nil || n.Left.Type == nil {
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenx(n.Left, res, !wantTrue, likely, to)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case OEQ, ONE, OLT, OGT, OLE, OGE:
|
2015-04-06 19:36:36 -07:00
|
|
|
if n.Left == nil || n.Left.Type == nil || n.Right == nil || n.Right.Type == nil {
|
|
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// n.Op is one of OEQ, ONE, OLT, OGT, OLE, OGE
|
|
|
|
|
nl := n.Left
|
|
|
|
|
nr := n.Right
|
2015-09-24 23:21:18 +02:00
|
|
|
op := n.Op
|
2015-04-06 19:36:36 -07:00
|
|
|
|
|
|
|
|
if !wantTrue {
|
|
|
|
|
if Isfloat[nr.Type.Etype] {
|
|
|
|
|
// Brcom is not valid on floats when NaN is involved.
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
ll := n.Ninit // avoid re-genning Ninit
|
2016-03-03 15:08:25 -08:00
|
|
|
setNodeSeq(&n.Ninit, nil)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
bgenx(n, res, true, likely, to)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OXOR, Types[TUINT8]), Nodintconst(1), res) // res = !res
|
2016-03-03 15:08:25 -08:00
|
|
|
setNodeSeq(&n.Ninit, ll)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
return
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
p1 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
p2 := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p1, Pc)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
bgenx(n, res, true, -likely, p2)
|
2015-04-06 19:36:36 -07:00
|
|
|
Patch(Gbranch(obj.AJMP, nil, 0), to)
|
|
|
|
|
Patch(p2, Pc)
|
2016-03-03 15:08:25 -08:00
|
|
|
setNodeSeq(&n.Ninit, ll)
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-09-24 23:21:18 +02:00
|
|
|
op = Brcom(op)
|
2015-04-06 19:36:36 -07:00
|
|
|
}
|
|
|
|
|
wantTrue = true
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// make simplest on right
|
|
|
|
|
if nl.Op == OLITERAL || (nl.Ullman < nr.Ullman && nl.Ullman < UINF) {
|
2015-09-24 23:21:18 +02:00
|
|
|
op = Brrev(op)
|
2015-04-06 19:36:36 -07:00
|
|
|
nl, nr = nr, nl
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
if Isslice(nl.Type) || Isinter(nl.Type) {
|
|
|
|
|
// front end should only leave cmp to literal nil
|
2015-09-24 23:21:18 +02:00
|
|
|
if (op != OEQ && op != ONE) || nr.Op != OLITERAL {
|
2015-04-06 19:36:36 -07:00
|
|
|
if Isslice(nl.Type) {
|
|
|
|
|
Yyerror("illegal slice comparison")
|
|
|
|
|
} else {
|
2015-03-18 17:26:36 -04:00
|
|
|
Yyerror("illegal interface comparison")
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var ptr Node
|
|
|
|
|
Igen(nl, &ptr, nil)
|
|
|
|
|
if Isslice(nl.Type) {
|
|
|
|
|
ptr.Xoffset += int64(Array_array)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
ptr.Type = Types[Tptr]
|
|
|
|
|
var tmp Node
|
|
|
|
|
Regalloc(&tmp, ptr.Type, &ptr)
|
|
|
|
|
Cgen(&ptr, &tmp)
|
|
|
|
|
Regfree(&ptr)
|
2015-09-24 23:21:18 +02:00
|
|
|
bgenNonZero(&tmp, res, op == OEQ != wantTrue, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
Regfree(&tmp)
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
if Iscomplex[nl.Type.Etype] {
|
2015-09-24 23:21:18 +02:00
|
|
|
complexbool(op, nl, nr, res, wantTrue, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
if Ctxt.Arch.Regsize == 4 && Is64(nr.Type) {
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
// TODO: Teach Cmp64 to generate boolean values and remove this.
|
|
|
|
|
bvgenjump(n, res, wantTrue, false)
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
if !nl.Addable || Isconst(nl, CTINT) {
|
|
|
|
|
nl = CgenTemp(nl)
|
|
|
|
|
}
|
|
|
|
|
if !nr.Addable {
|
|
|
|
|
nr = CgenTemp(nr)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-09-24 23:21:18 +02:00
|
|
|
Thearch.Cmp64(nl, nr, op, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
if nr.Ullman >= UINF {
|
2015-03-18 17:26:36 -04:00
|
|
|
var n1 Node
|
2015-04-06 19:36:36 -07:00
|
|
|
Regalloc(&n1, nl.Type, nil)
|
|
|
|
|
Cgen(nl, &n1)
|
2015-06-08 16:21:50 -07:00
|
|
|
nl = &n1
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var tmp Node
|
|
|
|
|
Tempname(&tmp, nl.Type)
|
|
|
|
|
Thearch.Gmove(&n1, &tmp)
|
|
|
|
|
Regfree(&n1)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, nr.Type, nil)
|
|
|
|
|
Cgen(nr, &n2)
|
2015-06-08 16:21:50 -07:00
|
|
|
nr = &n2
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
Regalloc(&n1, nl.Type, nil)
|
|
|
|
|
Cgen(&tmp, &n1)
|
|
|
|
|
Regfree(&n1)
|
2015-08-20 18:56:18 +02:00
|
|
|
Regfree(&n2)
|
2015-04-06 19:36:36 -07:00
|
|
|
} else {
|
|
|
|
|
var n1 Node
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nl.Addable && Ctxt.Arch.Thechar == '8' {
|
2015-03-18 17:26:36 -04:00
|
|
|
Tempname(&n1, nl.Type)
|
|
|
|
|
} else {
|
|
|
|
|
Regalloc(&n1, nl.Type, nil)
|
2015-04-06 19:36:36 -07:00
|
|
|
defer Regfree(&n1)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
nl = &n1
|
|
|
|
|
|
2015-09-10 11:33:09 -04:00
|
|
|
if Smallintconst(nr) && Ctxt.Arch.Thechar != '0' && Ctxt.Arch.Thechar != '9' {
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), nl, nr)
|
2015-09-24 23:21:18 +02:00
|
|
|
bins(nr.Type, res, op, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !nr.Addable && Ctxt.Arch.Thechar == '8' {
|
2015-04-06 19:36:36 -07:00
|
|
|
nr = CgenTemp(nr)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var n2 Node
|
2015-03-18 17:26:36 -04:00
|
|
|
Regalloc(&n2, nr.Type, nil)
|
|
|
|
|
Cgen(nr, &n2)
|
|
|
|
|
nr = &n2
|
2015-04-06 19:36:36 -07:00
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
l, r := nl, nr
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// On x86, only < and <= work right with NaN; reverse if needed
|
2015-09-24 23:21:18 +02:00
|
|
|
if Ctxt.Arch.Thechar == '6' && Isfloat[nl.Type.Etype] && (op == OGT || op == OGE) {
|
2015-04-06 19:36:36 -07:00
|
|
|
l, r = r, l
|
2015-09-24 23:21:18 +02:00
|
|
|
op = Brrev(op)
|
2015-04-06 19:36:36 -07:00
|
|
|
}
|
|
|
|
|
|
2015-09-10 11:33:09 -04:00
|
|
|
// MIPS does not have CMP instruction
|
|
|
|
|
if Ctxt.Arch.Thechar == '0' {
|
|
|
|
|
p := Thearch.Ginscmp(op, nr.Type, l, r, likely)
|
|
|
|
|
Patch(p, to)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// Do the comparison.
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OCMP, nr.Type), l, r)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
// Handle floating point special cases.
|
|
|
|
|
// Note that 8g has Bgen_float and is handled above.
|
|
|
|
|
if Isfloat[nl.Type.Etype] {
|
|
|
|
|
switch Ctxt.Arch.Thechar {
|
|
|
|
|
case '5':
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("genval 5g Isfloat special cases not implemented")
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
switch n.Op {
|
|
|
|
|
case ONE:
|
|
|
|
|
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, likely), to)
|
2015-09-24 23:21:18 +02:00
|
|
|
Patch(Gbranch(Thearch.Optoas(op, nr.Type), nr.Type, likely), to)
|
2015-04-06 19:36:36 -07:00
|
|
|
default:
|
|
|
|
|
p := Gbranch(Thearch.Optoas(OPS, nr.Type), nr.Type, -likely)
|
2015-09-24 23:21:18 +02:00
|
|
|
Patch(Gbranch(Thearch.Optoas(op, nr.Type), nr.Type, likely), to)
|
2015-04-06 19:36:36 -07:00
|
|
|
Patch(p, Pc)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
case '6':
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OEQ:
|
2015-03-18 17:26:36 -04:00
|
|
|
// neither NE nor P
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
var reg Node
|
|
|
|
|
Regalloc(®, Types[TBOOL], nil)
|
|
|
|
|
Thearch.Ginsboolval(Thearch.Optoas(OEQ, nr.Type), ®)
|
|
|
|
|
Thearch.Ginsboolval(Thearch.Optoas(OPC, nr.Type), res)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAND, Types[TBOOL]), ®, res)
|
|
|
|
|
Regfree(®)
|
|
|
|
|
} else {
|
|
|
|
|
p1 := Gbranch(Thearch.Optoas(ONE, nr.Type), nil, -likely)
|
|
|
|
|
p2 := Gbranch(Thearch.Optoas(OPS, nr.Type), nil, -likely)
|
|
|
|
|
Patch(Gbranch(obj.AJMP, nil, 0), to)
|
|
|
|
|
Patch(p1, Pc)
|
|
|
|
|
Patch(p2, Pc)
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
|
|
|
|
case ONE:
|
2015-03-18 17:26:36 -04:00
|
|
|
// either NE or P
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
|
|
|
|
var reg Node
|
|
|
|
|
Regalloc(®, Types[TBOOL], nil)
|
|
|
|
|
Thearch.Ginsboolval(Thearch.Optoas(ONE, nr.Type), ®)
|
|
|
|
|
Thearch.Ginsboolval(Thearch.Optoas(OPS, nr.Type), res)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OOR, Types[TBOOL]), ®, res)
|
|
|
|
|
Regfree(®)
|
|
|
|
|
} else {
|
|
|
|
|
Patch(Gbranch(Thearch.Optoas(ONE, nr.Type), nil, likely), to)
|
|
|
|
|
Patch(Gbranch(Thearch.Optoas(OPS, nr.Type), nil, likely), to)
|
|
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
case '7', '9':
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if genval {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("genval 7g, 9g Isfloat special cases not implemented")
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
2015-04-06 19:36:36 -07:00
|
|
|
switch n.Op {
|
2015-03-18 17:26:36 -04:00
|
|
|
// On arm64 and ppc64, <= and >= mishandle NaN. Must decompose into < or > and =.
|
2015-04-06 19:36:36 -07:00
|
|
|
// TODO(josh): Convert a <= b to b > a instead?
|
|
|
|
|
case OLE, OGE:
|
2015-09-24 23:21:18 +02:00
|
|
|
if op == OLE {
|
|
|
|
|
op = OLT
|
2015-04-06 19:36:36 -07:00
|
|
|
} else {
|
2015-09-24 23:21:18 +02:00
|
|
|
op = OGT
|
2015-04-06 19:36:36 -07:00
|
|
|
}
|
2015-09-24 23:21:18 +02:00
|
|
|
Patch(Gbranch(Thearch.Optoas(op, nr.Type), nr.Type, likely), to)
|
2015-04-06 19:36:36 -07:00
|
|
|
Patch(Gbranch(Thearch.Optoas(OEQ, nr.Type), nr.Type, likely), to)
|
|
|
|
|
return
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
// Not a special case. Insert the conditional jump or value gen.
|
2015-09-24 23:21:18 +02:00
|
|
|
bins(nr.Type, res, op, likely, to)
|
2015-04-06 19:36:36 -07:00
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
func bgenNonZero(n, res *Node, wantTrue bool, likely int, to *obj.Prog) {
|
2015-03-18 17:26:36 -04:00
|
|
|
// TODO: Optimize on systems that can compare to zero easily.
|
2015-09-24 23:21:18 +02:00
|
|
|
var op Op = ONE
|
2015-04-06 19:36:36 -07:00
|
|
|
if !wantTrue {
|
2015-09-24 23:21:18 +02:00
|
|
|
op = OEQ
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-09-10 11:33:09 -04:00
|
|
|
|
|
|
|
|
// MIPS does not have CMP instruction
|
|
|
|
|
if Thearch.Thechar == '0' {
|
|
|
|
|
p := Gbranch(Thearch.Optoas(op, n.Type), n.Type, likely)
|
|
|
|
|
Naddr(&p.From, n)
|
|
|
|
|
Patch(p, to)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-06 19:36:36 -07:00
|
|
|
var zero Node
|
|
|
|
|
Nodconst(&zero, n.Type, 0)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OCMP, n.Type), n, &zero)
|
2015-09-24 23:21:18 +02:00
|
|
|
bins(n.Type, res, op, likely, to)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// bins inserts an instruction to handle the result of a compare.
|
|
|
|
|
// If res is non-nil, it inserts appropriate value generation instructions.
|
|
|
|
|
// If res is nil, it inserts a branch to to.
|
2015-09-24 23:21:18 +02:00
|
|
|
func bins(typ *Type, res *Node, op Op, likely int, to *obj.Prog) {
|
|
|
|
|
a := Thearch.Optoas(op, typ)
|
cmd/internal/gc, cmd/6g: generate boolean values without jumps
Use SETcc instructions instead of Jcc to generate boolean values.
This generates shorter, jump-free code, which may in turn enable other
peephole optimizations.
For example, given
func f(i, j int) bool {
return i == j
}
Before
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) JEQ 21
0x000f 00015 (x.go:4) MOVB $0, "".~r2+24(FP)
0x0014 00020 (x.go:4) RET
0x0015 00021 (x.go:4) MOVB $1, "".~r2+24(FP)
0x001a 00026 (x.go:4) JMP 20
After
"".f t=1 size=32 value=0 args=0x18 locals=0x0
0x0000 00000 (x.go:3) TEXT "".f(SB), $0-24
0x0000 00000 (x.go:3) FUNCDATA $0, gclocals·b4c25e9b09fd0cf9bb429dcefe91c353(SB)
0x0000 00000 (x.go:3) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB)
0x0000 00000 (x.go:4) MOVQ "".i+8(FP), BX
0x0005 00005 (x.go:4) MOVQ "".j+16(FP), BP
0x000a 00010 (x.go:4) CMPQ BX, BP
0x000d 00013 (x.go:4) SETEQ "".~r2+24(FP)
0x0012 00018 (x.go:4) RET
regexp benchmarks, best of 12 runs:
benchmark old ns/op new ns/op delta
BenchmarkNotOnePassShortB 782 733 -6.27%
BenchmarkLiteral 180 171 -5.00%
BenchmarkNotLiteral 2855 2721 -4.69%
BenchmarkMatchHard_32 2672 2557 -4.30%
BenchmarkMatchHard_1K 80182 76732 -4.30%
BenchmarkMatchEasy1_32M 76440180 73304748 -4.10%
BenchmarkMatchEasy1_32K 68798 66350 -3.56%
BenchmarkAnchoredLongMatch 482 465 -3.53%
BenchmarkMatchEasy1_1M 2373042 2292692 -3.39%
BenchmarkReplaceAll 2776 2690 -3.10%
BenchmarkNotOnePassShortA 1397 1360 -2.65%
BenchmarkMatchClass_InRange 3842 3742 -2.60%
BenchmarkMatchEasy0_32 125 122 -2.40%
BenchmarkMatchEasy0_32K 11414 11164 -2.19%
BenchmarkMatchEasy0_1K 668 654 -2.10%
BenchmarkAnchoredShortMatch 260 255 -1.92%
BenchmarkAnchoredLiteralShortNonMatch 164 161 -1.83%
BenchmarkOnePassShortB 623 612 -1.77%
BenchmarkOnePassShortA 801 788 -1.62%
BenchmarkMatchClass 4094 4033 -1.49%
BenchmarkMatchEasy0_32M 14078800 13890704 -1.34%
BenchmarkMatchHard_32K 4095844 4045820 -1.22%
BenchmarkMatchEasy1_1K 1663 1643 -1.20%
BenchmarkMatchHard_1M 131261708 129708215 -1.18%
BenchmarkMatchHard_32M 4210112412 4169292003 -0.97%
BenchmarkMatchMedium_32K 2460752 2438611 -0.90%
BenchmarkMatchEasy0_1M 422914 419672 -0.77%
BenchmarkMatchMedium_1M 78581121 78040160 -0.69%
BenchmarkMatchMedium_32M 2515287278 2498464906 -0.67%
BenchmarkMatchMedium_32 1754 1746 -0.46%
BenchmarkMatchMedium_1K 52105 52106 +0.00%
BenchmarkAnchoredLiteralLongNonMatch 185 185 +0.00%
BenchmarkMatchEasy1_32 107 107 +0.00%
BenchmarkOnePassLongNotPrefix 505 505 +0.00%
BenchmarkOnePassLongPrefix 147 147 +0.00%
The godoc binary is ~0.12% smaller after this CL.
Updates #5729.
toolstash -cmp passes for all architectures other than amd64 and amd64p32.
Other architectures can be done in follow-up CLs.
Change-Id: I0e167e259274b722958567fc0af83a17ca002da7
Reviewed-on: https://go-review.googlesource.com/2284
Reviewed-by: Russ Cox <rsc@golang.org>
2015-04-08 09:54:15 -07:00
|
|
|
if res != nil {
|
|
|
|
|
// value gen
|
|
|
|
|
Thearch.Ginsboolval(a, res)
|
|
|
|
|
} else {
|
|
|
|
|
// jump
|
|
|
|
|
Patch(Gbranch(a, typ, likely), to)
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-05-18 16:54:59 -04:00
|
|
|
// stkof returns n's offset from SP if n is on the stack
|
|
|
|
|
// (either a local variable or the return value from a function call
|
|
|
|
|
// or the arguments to a function call).
|
|
|
|
|
// If n is not on the stack, stkof returns -1000.
|
|
|
|
|
// If n is on the stack but in an unknown location
|
|
|
|
|
// (due to array index arithmetic), stkof returns +1000.
|
|
|
|
|
//
|
|
|
|
|
// NOTE(rsc): It is possible that the ODOT and OINDEX cases
|
|
|
|
|
// are not relevant here, since it shouldn't be possible for them
|
|
|
|
|
// to be involved in an overlapping copy. Only function results
|
|
|
|
|
// from one call and the arguments to the next can overlap in
|
|
|
|
|
// any non-trivial way. If they can be dropped, then this function
|
|
|
|
|
// becomes much simpler and also more trustworthy.
|
|
|
|
|
// The fact that it works at all today is probably due to the fact
|
|
|
|
|
// that ODOT and OINDEX are irrelevant.
|
2015-03-18 17:26:36 -04:00
|
|
|
func stkof(n *Node) int64 {
|
|
|
|
|
switch n.Op {
|
|
|
|
|
case OINDREG:
|
2015-05-18 16:54:59 -04:00
|
|
|
if n.Reg != int16(Thearch.REGSP) {
|
|
|
|
|
return -1000 // not on stack
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
return n.Xoffset
|
|
|
|
|
|
|
|
|
|
case ODOT:
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
if Isptr[t.Etype] {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := stkof(n.Left)
|
2015-05-18 16:54:59 -04:00
|
|
|
if off == -1000 || off == +1000 {
|
2015-03-18 17:26:36 -04:00
|
|
|
return off
|
|
|
|
|
}
|
|
|
|
|
return off + n.Xoffset
|
|
|
|
|
|
|
|
|
|
case OINDEX:
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
if !Isfixedarray(t) {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
off := stkof(n.Left)
|
2015-05-18 16:54:59 -04:00
|
|
|
if off == -1000 || off == +1000 {
|
2015-03-18 17:26:36 -04:00
|
|
|
return off
|
|
|
|
|
}
|
|
|
|
|
if Isconst(n.Right, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
return off + t.Type.Width*Mpgetfix(n.Right.Val().U.(*Mpint))
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
2015-05-18 16:54:59 -04:00
|
|
|
return +1000 // on stack but not sure exactly where
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
case OCALLMETH, OCALLINTER, OCALLFUNC:
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
if Isptr[t.Etype] {
|
|
|
|
|
t = t.Type
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var flist Iter
|
|
|
|
|
t = Structfirst(&flist, Getoutarg(t))
|
|
|
|
|
if t != nil {
|
2015-10-08 22:13:44 +13:00
|
|
|
return t.Width + Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// botch - probably failing to recognize address
|
|
|
|
|
// arithmetic on the above. eg INDEX and DOT
|
2015-05-18 16:54:59 -04:00
|
|
|
return -1000 // not on stack
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// block copy:
|
|
|
|
|
// memmove(&ns, &n, w);
|
|
|
|
|
// if wb is true, needs write barrier.
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
func sgen_wb(n *Node, ns *Node, w int64, wb bool) {
|
2015-03-18 17:26:36 -04:00
|
|
|
if Debug['g'] != 0 {
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
op := "sgen"
|
|
|
|
|
if wb {
|
|
|
|
|
op = "sgen-wb"
|
|
|
|
|
}
|
|
|
|
|
fmt.Printf("\n%s w=%d\n", op, w)
|
2015-03-18 17:26:36 -04:00
|
|
|
Dump("r", n)
|
|
|
|
|
Dump("res", ns)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if n.Ullman >= UINF && ns.Ullman >= UINF {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("sgen UINF")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if w < 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("sgen copy %d", w)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If copying .args, that's all the results, so record definition sites
|
|
|
|
|
// for them for the liveness analysis.
|
|
|
|
|
if ns.Op == ONAME && ns.Sym.Name == ".args" {
|
2016-02-25 10:35:19 -08:00
|
|
|
for _, ln := range Curfn.Func.Dcl {
|
|
|
|
|
if ln.Class == PPARAMOUT {
|
|
|
|
|
Gvardef(ln)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Avoid taking the address for simple enough types.
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if componentgen_wb(n, ns, wb) {
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if w == 0 {
|
|
|
|
|
// evaluate side effects only
|
|
|
|
|
var nodr Node
|
|
|
|
|
Regalloc(&nodr, Types[Tptr], nil)
|
|
|
|
|
Agen(ns, &nodr)
|
|
|
|
|
Agen(n, &nodr)
|
|
|
|
|
Regfree(&nodr)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// offset on the stack
|
|
|
|
|
osrc := stkof(n)
|
|
|
|
|
odst := stkof(ns)
|
|
|
|
|
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
if odst != -1000 {
|
|
|
|
|
// on stack, write barrier not needed after all
|
|
|
|
|
wb = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if osrc != -1000 && odst != -1000 && (osrc == 1000 || odst == 1000) || wb && osrc != -1000 {
|
2015-03-18 17:26:36 -04:00
|
|
|
// osrc and odst both on stack, and at least one is in
|
2016-03-01 23:21:55 +00:00
|
|
|
// an unknown position. Could generate code to test
|
2015-03-18 17:26:36 -04:00
|
|
|
// for forward/backward copy, but instead just copy
|
|
|
|
|
// to a temporary location first.
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
//
|
|
|
|
|
// OR: write barrier needed and source is on stack.
|
|
|
|
|
// Invoking the write barrier will use the stack to prepare its call.
|
|
|
|
|
// Copy to temporary.
|
2015-03-18 17:26:36 -04:00
|
|
|
var tmp Node
|
|
|
|
|
Tempname(&tmp, n.Type)
|
cmd/internal/gc: emit write barriers at lower level
This is primarily preparation for inlining, not an optimization by itself,
but it still helps some.
name old new delta
BenchmarkBinaryTree17 18.2s × (0.99,1.01) 17.9s × (0.99,1.01) -1.57%
BenchmarkFannkuch11 4.44s × (1.00,1.00) 4.42s × (1.00,1.00) -0.40%
BenchmarkFmtFprintfEmpty 119ns × (0.95,1.02) 118ns × (0.96,1.02) ~
BenchmarkFmtFprintfString 501ns × (0.99,1.02) 486ns × (0.99,1.01) -2.89%
BenchmarkFmtFprintfInt 474ns × (0.99,1.00) 457ns × (0.99,1.01) -3.59%
BenchmarkFmtFprintfIntInt 792ns × (1.00,1.00) 768ns × (1.00,1.01) -3.03%
BenchmarkFmtFprintfPrefixedInt 574ns × (1.00,1.01) 584ns × (0.99,1.03) +1.83%
BenchmarkFmtFprintfFloat 749ns × (1.00,1.00) 739ns × (0.99,1.00) -1.34%
BenchmarkFmtManyArgs 2.94µs × (1.00,1.01) 2.77µs × (1.00,1.00) -5.76%
BenchmarkGobDecode 39.5ms × (0.99,1.01) 39.3ms × (0.99,1.01) ~
BenchmarkGobEncode 39.4ms × (1.00,1.01) 39.4ms × (0.99,1.00) ~
BenchmarkGzip 658ms × (1.00,1.01) 661ms × (0.99,1.01) ~
BenchmarkGunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) +0.22%
BenchmarkHTTPClientServer 134µs × (0.99,1.01) 133µs × (0.98,1.01) ~
BenchmarkJSONEncode 57.1ms × (0.99,1.01) 56.5ms × (0.99,1.01) ~
BenchmarkJSONDecode 141ms × (1.00,1.00) 143ms × (1.00,1.00) +1.09%
BenchmarkMandelbrot200 6.01ms × (1.00,1.00) 6.01ms × (1.00,1.00) ~
BenchmarkGoParse 10.1ms × (0.91,1.09) 9.6ms × (0.94,1.07) ~
BenchmarkRegexpMatchEasy0_32 207ns × (1.00,1.01) 210ns × (1.00,1.00) +1.45%
BenchmarkRegexpMatchEasy0_1K 592ns × (0.99,1.00) 596ns × (0.99,1.01) +0.68%
BenchmarkRegexpMatchEasy1_32 184ns × (0.99,1.01) 184ns × (0.99,1.01) ~
BenchmarkRegexpMatchEasy1_1K 1.01µs × (1.00,1.00) 1.01µs × (0.99,1.01) ~
BenchmarkRegexpMatchMedium_32 327ns × (0.99,1.00) 327ns × (1.00,1.01) ~
BenchmarkRegexpMatchMedium_1K 92.5µs × (1.00,1.00) 93.0µs × (1.00,1.02) +0.48%
BenchmarkRegexpMatchHard_32 4.79µs × (0.95,1.00) 4.76µs × (0.95,1.01) ~
BenchmarkRegexpMatchHard_1K 136µs × (1.00,1.00) 136µs × (1.00,1.01) ~
BenchmarkRevcomp 900ms × (0.99,1.01) 892ms × (1.00,1.01) ~
BenchmarkTemplate 170ms × (0.99,1.01) 175ms × (0.99,1.00) +2.95%
BenchmarkTimeParse 645ns × (1.00,1.00) 638ns × (1.00,1.00) -1.16%
BenchmarkTimeFormat 740ns × (1.00,1.00) 772ns × (1.00,1.00) +4.39%
Change-Id: I0be905e32791e0cb70ff01f169c4b309a971d981
Reviewed-on: https://go-review.googlesource.com/9159
Reviewed-by: Rick Hudson <rlh@golang.org>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-04-17 00:25:10 -04:00
|
|
|
sgen_wb(n, &tmp, w, false)
|
|
|
|
|
sgen_wb(&tmp, ns, w, wb)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if wb {
|
|
|
|
|
cgen_wbfat(n, ns)
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
2015-04-08 13:34:42 -04:00
|
|
|
Thearch.Blockcopy(n, ns, osrc, odst, w)
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate:
|
|
|
|
|
// call f
|
|
|
|
|
// proc=-1 normal call but no return
|
|
|
|
|
// proc=0 normal call
|
|
|
|
|
// proc=1 goroutine run in new proc
|
|
|
|
|
// proc=2 defer call save away stack
|
|
|
|
|
// proc=3 normal call to C pointer (not Go func value)
|
2015-03-18 17:26:36 -04:00
|
|
|
func Ginscall(f *Node, proc int) {
|
|
|
|
|
if f.Type != nil {
|
|
|
|
|
extra := int32(0)
|
|
|
|
|
if proc == 1 || proc == 2 {
|
|
|
|
|
extra = 2 * int32(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
Setmaxarg(f.Type, extra)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch proc {
|
|
|
|
|
default:
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("Ginscall: bad proc %d", proc)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
case 0, // normal call
|
|
|
|
|
-1: // normal call but no return
|
|
|
|
|
if f.Op == ONAME && f.Class == PFUNC {
|
|
|
|
|
if f == Deferreturn {
|
cmd/compile, cmd/link, runtime: on ppc64x, maintain the TOC pointer in R2 when compiling PIC
The PowerPC ISA does not have a PC-relative load instruction, which poses
obvious challenges when generating position-independent code. The way the ELFv2
ABI addresses this is to specify that r2 points to a per "module" (shared
library or executable) TOC pointer. Maintaining this pointer requires
cooperation between codegen and the system linker:
* Non-leaf functions leave space on the stack at r1+24 to save the TOC pointer.
* A call to a function that *might* have to go via a PLT stub must be followed
by a nop instruction that the system linker can replace with "ld r1, 24(r1)"
to restore the TOC pointer (only when dynamically linking Go code).
* When calling a function via a function pointer, the address of the function
must be in r12, and the first couple of instructions (the "global entry
point") of the called function use this to derive the address of the TOC
for the module it is in.
* When calling a function that is implemented in the same module, the system
linker adjusts the call to skip over the instructions mentioned above (the
"local entry point"), assuming that r2 is already correctly set.
So this changeset adds the global entry point instructions, sets the metadata so
the system linker knows where the local entry point is, inserts code to save the
TOC pointer at 24(r1), adds a nop after any call not known to be local and copes
with the odd non-local code transfer in the runtime (e.g. the stuff around
jmpdefer). It does not actually compile PIC yet.
Change-Id: I7522e22bdfd2f891745a900c60254fe9e372c854
Reviewed-on: https://go-review.googlesource.com/15967
Reviewed-by: Russ Cox <rsc@golang.org>
2015-10-16 15:42:09 +13:00
|
|
|
// Deferred calls will appear to be returning to the CALL
|
|
|
|
|
// deferreturn(SB) that we are about to emit. However, the
|
|
|
|
|
// stack scanning code will think that the instruction
|
|
|
|
|
// before the CALL is executing. To avoid the scanning
|
|
|
|
|
// code making bad assumptions (both cosmetic such as
|
|
|
|
|
// showing the wrong line number and fatal, such as being
|
|
|
|
|
// confused over whether a stack slot contains a pointer
|
|
|
|
|
// or a scalar) insert an actual hardware NOP that will
|
|
|
|
|
// have the right line number. This is different from
|
|
|
|
|
// obj.ANOP, which is a virtual no-op that doesn't make it
|
|
|
|
|
// into the instruction stream.
|
2015-03-18 17:26:36 -04:00
|
|
|
Thearch.Ginsnop()
|
cmd/compile, cmd/link, runtime: on ppc64x, maintain the TOC pointer in R2 when compiling PIC
The PowerPC ISA does not have a PC-relative load instruction, which poses
obvious challenges when generating position-independent code. The way the ELFv2
ABI addresses this is to specify that r2 points to a per "module" (shared
library or executable) TOC pointer. Maintaining this pointer requires
cooperation between codegen and the system linker:
* Non-leaf functions leave space on the stack at r1+24 to save the TOC pointer.
* A call to a function that *might* have to go via a PLT stub must be followed
by a nop instruction that the system linker can replace with "ld r1, 24(r1)"
to restore the TOC pointer (only when dynamically linking Go code).
* When calling a function via a function pointer, the address of the function
must be in r12, and the first couple of instructions (the "global entry
point") of the called function use this to derive the address of the TOC
for the module it is in.
* When calling a function that is implemented in the same module, the system
linker adjusts the call to skip over the instructions mentioned above (the
"local entry point"), assuming that r2 is already correctly set.
So this changeset adds the global entry point instructions, sets the metadata so
the system linker knows where the local entry point is, inserts code to save the
TOC pointer at 24(r1), adds a nop after any call not known to be local and copes
with the odd non-local code transfer in the runtime (e.g. the stuff around
jmpdefer). It does not actually compile PIC yet.
Change-Id: I7522e22bdfd2f891745a900c60254fe9e372c854
Reviewed-on: https://go-review.googlesource.com/15967
Reviewed-by: Russ Cox <rsc@golang.org>
2015-10-16 15:42:09 +13:00
|
|
|
|
|
|
|
|
if Thearch.Thechar == '9' {
|
|
|
|
|
// On ppc64, when compiling Go into position
|
|
|
|
|
// independent code on ppc64le we insert an
|
|
|
|
|
// instruction to reload the TOC pointer from the
|
|
|
|
|
// stack as well. See the long comment near
|
|
|
|
|
// jmpdefer in runtime/asm_ppc64.s for why.
|
|
|
|
|
// If the MOVD is not needed, insert a hardware NOP
|
|
|
|
|
// so that the same number of instructions are used
|
|
|
|
|
// on ppc64 in both shared and non-shared modes.
|
|
|
|
|
if Ctxt.Flag_shared != 0 {
|
|
|
|
|
p := Thearch.Gins(ppc64.AMOVD, nil, nil)
|
|
|
|
|
p.From.Type = obj.TYPE_MEM
|
|
|
|
|
p.From.Offset = 24
|
|
|
|
|
p.From.Reg = ppc64.REGSP
|
|
|
|
|
p.To.Type = obj.TYPE_REG
|
|
|
|
|
p.To.Reg = ppc64.REG_R2
|
|
|
|
|
} else {
|
|
|
|
|
Thearch.Ginsnop()
|
|
|
|
|
}
|
|
|
|
|
}
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
p := Thearch.Gins(obj.ACALL, nil, f)
|
|
|
|
|
Afunclit(&p.To, f)
|
|
|
|
|
if proc == -1 || Noreturn(p) {
|
|
|
|
|
Thearch.Gins(obj.AUNDEF, nil, nil)
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var reg Node
|
|
|
|
|
Nodreg(®, Types[Tptr], Thearch.REGCTXT)
|
|
|
|
|
var r1 Node
|
|
|
|
|
Nodreg(&r1, Types[Tptr], Thearch.REGCALLX)
|
|
|
|
|
Thearch.Gmove(f, ®)
|
|
|
|
|
reg.Op = OINDREG
|
|
|
|
|
Thearch.Gmove(®, &r1)
|
|
|
|
|
reg.Op = OREGISTER
|
|
|
|
|
Thearch.Gins(obj.ACALL, ®, &r1)
|
|
|
|
|
|
|
|
|
|
case 3: // normal call of c function pointer
|
|
|
|
|
Thearch.Gins(obj.ACALL, nil, f)
|
|
|
|
|
|
|
|
|
|
case 1, // call in new proc (go)
|
|
|
|
|
2: // deferred call (defer)
|
|
|
|
|
var stk Node
|
|
|
|
|
|
|
|
|
|
// size of arguments at 0(SP)
|
|
|
|
|
stk.Op = OINDREG
|
2015-04-13 10:28:57 -07:00
|
|
|
stk.Reg = int16(Thearch.REGSP)
|
2015-10-08 22:13:44 +13:00
|
|
|
stk.Xoffset = Ctxt.FixedFrameSize()
|
2015-05-02 04:36:53 -04:00
|
|
|
Thearch.Ginscon(Thearch.Optoas(OAS, Types[TINT32]), int64(Argsize(f.Type)), &stk)
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
// FuncVal* at 8(SP)
|
2015-10-08 22:13:44 +13:00
|
|
|
stk.Xoffset = int64(Widthptr) + Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
var reg Node
|
|
|
|
|
Nodreg(®, Types[Tptr], Thearch.REGCALLX2)
|
|
|
|
|
Thearch.Gmove(f, ®)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), ®, &stk)
|
|
|
|
|
|
|
|
|
|
if proc == 1 {
|
|
|
|
|
Ginscall(Newproc, 0)
|
|
|
|
|
} else {
|
2015-09-07 22:19:30 +02:00
|
|
|
if !hasdefer {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("hasdefer=0 but has defer")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
Ginscall(Deferproc, 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if proc == 2 {
|
|
|
|
|
Nodreg(®, Types[TINT32], Thearch.REGRETURN)
|
2015-05-06 12:28:19 -04:00
|
|
|
p := Thearch.Ginscmp(OEQ, Types[TINT32], ®, Nodintconst(0), +1)
|
2015-03-18 17:26:36 -04:00
|
|
|
cgen_ret(nil)
|
|
|
|
|
Patch(p, Pc)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// n is call to interface method.
|
|
|
|
|
// generate res = n.
|
2015-03-18 17:26:36 -04:00
|
|
|
func cgen_callinter(n *Node, res *Node, proc int) {
|
|
|
|
|
i := n.Left
|
|
|
|
|
if i.Op != ODOTINTER {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_callinter: not ODOTINTER %v", Oconv(int(i.Op), 0))
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
f := i.Right // field
|
|
|
|
|
if f.Op != ONAME {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_callinter: not ONAME %v", Oconv(int(f.Op), 0))
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
i = i.Left // interface
|
|
|
|
|
|
2015-04-02 19:58:37 -07:00
|
|
|
if !i.Addable {
|
2015-03-18 17:26:36 -04:00
|
|
|
var tmpi Node
|
|
|
|
|
Tempname(&tmpi, i.Type)
|
|
|
|
|
Cgen(i, &tmpi)
|
|
|
|
|
i = &tmpi
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Genlist(n.List) // assign the args
|
|
|
|
|
|
|
|
|
|
// i is now addable, prepare an indirected
|
|
|
|
|
// register to hold its address.
|
|
|
|
|
var nodi Node
|
|
|
|
|
Igen(i, &nodi, res) // REG = &inter
|
|
|
|
|
|
|
|
|
|
var nodsp Node
|
|
|
|
|
Nodindreg(&nodsp, Types[Tptr], Thearch.REGSP)
|
2015-10-08 22:13:44 +13:00
|
|
|
nodsp.Xoffset = Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
if proc != 0 {
|
|
|
|
|
nodsp.Xoffset += 2 * int64(Widthptr) // leave room for size & fn
|
|
|
|
|
}
|
|
|
|
|
nodi.Type = Types[Tptr]
|
|
|
|
|
nodi.Xoffset += int64(Widthptr)
|
|
|
|
|
Cgen(&nodi, &nodsp) // {0, 8(nacl), or 16}(SP) = 8(REG) -- i.data
|
|
|
|
|
|
|
|
|
|
var nodo Node
|
|
|
|
|
Regalloc(&nodo, Types[Tptr], res)
|
|
|
|
|
|
|
|
|
|
nodi.Type = Types[Tptr]
|
|
|
|
|
nodi.Xoffset -= int64(Widthptr)
|
|
|
|
|
Cgen(&nodi, &nodo) // REG = 0(REG) -- i.tab
|
|
|
|
|
Regfree(&nodi)
|
|
|
|
|
|
|
|
|
|
var nodr Node
|
|
|
|
|
Regalloc(&nodr, Types[Tptr], &nodo)
|
|
|
|
|
if n.Left.Xoffset == BADWIDTH {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_callinter: badwidth")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
Cgen_checknil(&nodo) // in case offset is huge
|
|
|
|
|
nodo.Op = OINDREG
|
|
|
|
|
nodo.Xoffset = n.Left.Xoffset + 3*int64(Widthptr) + 8
|
|
|
|
|
if proc == 0 {
|
|
|
|
|
// plain call: use direct c function pointer - more efficient
|
|
|
|
|
Cgen(&nodo, &nodr) // REG = 32+offset(REG) -- i.tab->fun[f]
|
|
|
|
|
proc = 3
|
|
|
|
|
} else {
|
|
|
|
|
// go/defer. generate go func value.
|
|
|
|
|
Agen(&nodo, &nodr) // REG = &(32+offset(REG)) -- i.tab->fun[f]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
nodr.Type = n.Left.Type
|
|
|
|
|
Ginscall(&nodr, proc)
|
|
|
|
|
|
|
|
|
|
Regfree(&nodr)
|
|
|
|
|
Regfree(&nodo)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate function call;
|
|
|
|
|
// proc=0 normal call
|
|
|
|
|
// proc=1 goroutine run in new proc
|
|
|
|
|
// proc=2 defer call save away stack
|
2015-03-18 17:26:36 -04:00
|
|
|
func cgen_call(n *Node, proc int) {
|
|
|
|
|
if n == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var afun Node
|
|
|
|
|
if n.Left.Ullman >= UINF {
|
|
|
|
|
// if name involves a fn call
|
|
|
|
|
// precompute the address of the fn
|
|
|
|
|
Tempname(&afun, Types[Tptr])
|
|
|
|
|
|
|
|
|
|
Cgen(n.Left, &afun)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Genlist(n.List) // assign the args
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
|
|
|
|
|
// call tempname pointer
|
|
|
|
|
if n.Left.Ullman >= UINF {
|
|
|
|
|
var nod Node
|
|
|
|
|
Regalloc(&nod, Types[Tptr], nil)
|
|
|
|
|
Cgen_as(&nod, &afun)
|
|
|
|
|
nod.Type = t
|
|
|
|
|
Ginscall(&nod, proc)
|
|
|
|
|
Regfree(&nod)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// call pointer
|
|
|
|
|
if n.Left.Op != ONAME || n.Left.Class != PFUNC {
|
|
|
|
|
var nod Node
|
|
|
|
|
Regalloc(&nod, Types[Tptr], nil)
|
|
|
|
|
Cgen_as(&nod, n.Left)
|
|
|
|
|
nod.Type = t
|
|
|
|
|
Ginscall(&nod, proc)
|
|
|
|
|
Regfree(&nod)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// call direct
|
2015-05-15 10:02:19 -07:00
|
|
|
n.Left.Name.Method = true
|
2015-03-18 17:26:36 -04:00
|
|
|
|
|
|
|
|
Ginscall(n.Left, proc)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// call to n has already been generated.
|
|
|
|
|
// generate:
|
|
|
|
|
// res = return value from call.
|
2015-03-18 17:26:36 -04:00
|
|
|
func cgen_callret(n *Node, res *Node) {
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
if t.Etype == TPTR32 || t.Etype == TPTR64 {
|
|
|
|
|
t = t.Type
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var flist Iter
|
|
|
|
|
fp := Structfirst(&flist, Getoutarg(t))
|
|
|
|
|
if fp == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_callret: nil")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var nod Node
|
|
|
|
|
nod.Op = OINDREG
|
2015-04-13 10:28:57 -07:00
|
|
|
nod.Reg = int16(Thearch.REGSP)
|
2015-04-02 19:58:37 -07:00
|
|
|
nod.Addable = true
|
2015-03-18 17:26:36 -04:00
|
|
|
|
2015-10-08 22:13:44 +13:00
|
|
|
nod.Xoffset = fp.Width + Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
nod.Type = fp.Type
|
|
|
|
|
Cgen_as(res, &nod)
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// call to n has already been generated.
|
|
|
|
|
// generate:
|
|
|
|
|
// res = &return value from call.
|
2015-03-18 17:26:36 -04:00
|
|
|
func cgen_aret(n *Node, res *Node) {
|
|
|
|
|
t := n.Left.Type
|
|
|
|
|
if Isptr[t.Etype] {
|
|
|
|
|
t = t.Type
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var flist Iter
|
|
|
|
|
fp := Structfirst(&flist, Getoutarg(t))
|
|
|
|
|
if fp == nil {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("cgen_aret: nil")
|
2015-03-18 17:26:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var nod1 Node
|
|
|
|
|
nod1.Op = OINDREG
|
2015-04-13 10:28:57 -07:00
|
|
|
nod1.Reg = int16(Thearch.REGSP)
|
2015-04-02 19:58:37 -07:00
|
|
|
nod1.Addable = true
|
2015-10-08 22:13:44 +13:00
|
|
|
nod1.Xoffset = fp.Width + Ctxt.FixedFrameSize()
|
2015-03-18 17:26:36 -04:00
|
|
|
nod1.Type = fp.Type
|
|
|
|
|
|
|
|
|
|
if res.Op != OREGISTER {
|
|
|
|
|
var nod2 Node
|
|
|
|
|
Regalloc(&nod2, Types[Tptr], res)
|
|
|
|
|
Agen(&nod1, &nod2)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OAS, Types[Tptr]), &nod2, res)
|
|
|
|
|
Regfree(&nod2)
|
|
|
|
|
} else {
|
|
|
|
|
Agen(&nod1, res)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate return.
|
|
|
|
|
// n->left is assignments to return values.
|
2015-03-18 17:26:36 -04:00
|
|
|
func cgen_ret(n *Node) {
|
|
|
|
|
if n != nil {
|
|
|
|
|
Genlist(n.List) // copy out args
|
|
|
|
|
}
|
2015-09-07 22:19:30 +02:00
|
|
|
if hasdefer {
|
2015-03-18 17:26:36 -04:00
|
|
|
Ginscall(Deferreturn, 0)
|
|
|
|
|
}
|
2016-03-03 15:08:25 -08:00
|
|
|
Genlist(Curfn.Func.Exit)
|
2015-03-18 17:26:36 -04:00
|
|
|
p := Thearch.Gins(obj.ARET, nil, nil)
|
|
|
|
|
if n != nil && n.Op == ORETJMP {
|
|
|
|
|
p.To.Type = obj.TYPE_MEM
|
|
|
|
|
p.To.Name = obj.NAME_EXTERN
|
|
|
|
|
p.To.Sym = Linksym(n.Left.Sym)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-22 09:51:12 +09:00
|
|
|
// generate division according to op, one of:
|
|
|
|
|
// res = nl / nr
|
|
|
|
|
// res = nl % nr
|
2015-09-24 23:21:18 +02:00
|
|
|
func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
|
2015-03-18 17:26:36 -04:00
|
|
|
var w int
|
|
|
|
|
|
|
|
|
|
// TODO(rsc): arm64 needs to support the relevant instructions
|
|
|
|
|
// in peep and optoas in order to enable this.
|
|
|
|
|
// TODO(rsc): ppc64 needs to support the relevant instructions
|
|
|
|
|
// in peep and optoas in order to enable this.
|
2015-09-10 11:33:09 -04:00
|
|
|
if nr.Op != OLITERAL || Ctxt.Arch.Thechar == '0' || Ctxt.Arch.Thechar == '7' || Ctxt.Arch.Thechar == '9' {
|
2015-03-18 17:26:36 -04:00
|
|
|
goto longdiv
|
|
|
|
|
}
|
|
|
|
|
w = int(nl.Type.Width * 8)
|
|
|
|
|
|
|
|
|
|
// Front end handled 32-bit division. We only need to handle 64-bit.
|
|
|
|
|
// try to do division by multiply by (2^w)/d
|
|
|
|
|
// see hacker's delight chapter 10
|
|
|
|
|
switch Simtype[nl.Type.Etype] {
|
|
|
|
|
default:
|
|
|
|
|
goto longdiv
|
|
|
|
|
|
|
|
|
|
case TUINT64:
|
|
|
|
|
var m Magic
|
|
|
|
|
m.W = w
|
2015-05-27 00:47:05 -04:00
|
|
|
m.Ud = uint64(Mpgetfix(nr.Val().U.(*Mpint)))
|
2015-03-18 17:26:36 -04:00
|
|
|
Umagic(&m)
|
|
|
|
|
if m.Bad != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if op == OMOD {
|
|
|
|
|
goto longmod
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Cgenr(nl, &n1, nil)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, nl.Type, int64(m.Um))
|
|
|
|
|
var n3 Node
|
|
|
|
|
Regalloc(&n3, nl.Type, res)
|
|
|
|
|
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
|
|
|
|
|
|
|
|
|
if m.Ua != 0 {
|
|
|
|
|
// need to add numerator accounting for overflow
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
|
|
|
|
|
|
|
|
|
Nodconst(&n2, nl.Type, 1)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(ORROTC, nl.Type), &n2, &n3)
|
|
|
|
|
Nodconst(&n2, nl.Type, int64(m.S)-1)
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3)
|
|
|
|
|
} else {
|
|
|
|
|
Nodconst(&n2, nl.Type, int64(m.S))
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) // shift dx
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(&n3, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
case TINT64:
|
|
|
|
|
var m Magic
|
|
|
|
|
m.W = w
|
2015-05-27 00:47:05 -04:00
|
|
|
m.Sd = Mpgetfix(nr.Val().U.(*Mpint))
|
2015-03-18 17:26:36 -04:00
|
|
|
Smagic(&m)
|
|
|
|
|
if m.Bad != 0 {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if op == OMOD {
|
|
|
|
|
goto longmod
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var n1 Node
|
|
|
|
|
Cgenr(nl, &n1, res)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Nodconst(&n2, nl.Type, m.Sm)
|
|
|
|
|
var n3 Node
|
|
|
|
|
Regalloc(&n3, nl.Type, nil)
|
|
|
|
|
Thearch.Cgen_hmul(&n1, &n2, &n3)
|
|
|
|
|
|
|
|
|
|
if m.Sm < 0 {
|
|
|
|
|
// need to add numerator
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Nodconst(&n2, nl.Type, int64(m.S))
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n3) // shift n3
|
|
|
|
|
|
|
|
|
|
Nodconst(&n2, nl.Type, int64(w)-1)
|
|
|
|
|
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(ORSH, nl.Type), &n2, &n1) // -1 iff num is neg
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
|
|
|
|
|
|
|
|
|
|
if m.Sd < 0 {
|
|
|
|
|
// this could probably be removed
|
|
|
|
|
// by factoring it into the multiplier
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Thearch.Gmove(&n3, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
goto longdiv
|
|
|
|
|
|
|
|
|
|
// division and mod using (slow) hardware instruction
|
|
|
|
|
longdiv:
|
|
|
|
|
Thearch.Dodiv(op, nl, nr, res)
|
|
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
// mod using formula A%B = A-(A/B*B) but
|
|
|
|
|
// we know that there is a fast algorithm for A/B
|
|
|
|
|
longmod:
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, nl.Type, res)
|
|
|
|
|
|
|
|
|
|
Cgen(nl, &n1)
|
|
|
|
|
var n2 Node
|
|
|
|
|
Regalloc(&n2, nl.Type, nil)
|
|
|
|
|
cgen_div(ODIV, &n1, nr, &n2)
|
|
|
|
|
a := Thearch.Optoas(OMUL, nl.Type)
|
|
|
|
|
if w == 8 {
|
|
|
|
|
// use 2-operand 16-bit multiply
|
|
|
|
|
// because there is no 2-operand 8-bit multiply
|
|
|
|
|
a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !Smallintconst(nr) {
|
|
|
|
|
var n3 Node
|
|
|
|
|
Regalloc(&n3, nl.Type, nil)
|
|
|
|
|
Cgen(nr, &n3)
|
|
|
|
|
Thearch.Gins(a, &n3, &n2)
|
|
|
|
|
Regfree(&n3)
|
|
|
|
|
} else {
|
|
|
|
|
Thearch.Gins(a, nr, &n2)
|
|
|
|
|
}
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n2, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, res)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
Regfree(&n2)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func Fixlargeoffset(n *Node) {
|
|
|
|
|
if n == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if n.Op != OINDREG {
|
|
|
|
|
return
|
|
|
|
|
}
|
2015-04-13 10:28:57 -07:00
|
|
|
if n.Reg == int16(Thearch.REGSP) { // stack offset cannot be large
|
2015-03-18 17:26:36 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if n.Xoffset != int64(int32(n.Xoffset)) {
|
|
|
|
|
// offset too large, add to register instead.
|
|
|
|
|
a := *n
|
|
|
|
|
|
|
|
|
|
a.Op = OREGISTER
|
|
|
|
|
a.Type = Types[Tptr]
|
|
|
|
|
a.Xoffset = 0
|
|
|
|
|
Cgen_checknil(&a)
|
|
|
|
|
Thearch.Ginscon(Thearch.Optoas(OADD, Types[Tptr]), n.Xoffset, &a)
|
|
|
|
|
n.Xoffset = 0
|
|
|
|
|
}
|
|
|
|
|
}
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
|
|
|
|
|
func cgen_append(n, res *Node) {
|
|
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("cgen_append-n", n)
|
|
|
|
|
Dump("cgen_append-res", res)
|
|
|
|
|
}
|
2016-03-03 15:08:25 -08:00
|
|
|
if res.Op != ONAME && !samesafeexpr(res, nodeSeqFirst(n.List)) {
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Dump("cgen_append-n", n)
|
|
|
|
|
Dump("cgen_append-res", res)
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("append not lowered")
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
}
|
2016-03-03 15:08:25 -08:00
|
|
|
for it := nodeSeqIterate(n.List); !it.Done(); it.Next() {
|
|
|
|
|
if it.N().Ullman >= UINF {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("append with function call arguments")
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// res = append(src, x, y, z)
|
|
|
|
|
//
|
|
|
|
|
// If res and src are the same, we can avoid writing to base and cap
|
|
|
|
|
// unless we grow the underlying array.
|
2016-03-03 15:08:25 -08:00
|
|
|
needFullUpdate := !samesafeexpr(res, nodeSeqFirst(n.List))
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
|
|
|
|
|
// Copy src triple into base, len, cap.
|
|
|
|
|
base := temp(Types[Tptr])
|
|
|
|
|
len := temp(Types[TUINT])
|
|
|
|
|
cap := temp(Types[TUINT])
|
|
|
|
|
|
|
|
|
|
var src Node
|
2016-03-03 15:08:25 -08:00
|
|
|
Igen(nodeSeqFirst(n.List), &src, nil)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
src.Type = Types[Tptr]
|
|
|
|
|
Thearch.Gmove(&src, base)
|
|
|
|
|
src.Type = Types[TUINT]
|
|
|
|
|
src.Xoffset += int64(Widthptr)
|
|
|
|
|
Thearch.Gmove(&src, len)
|
|
|
|
|
src.Xoffset += int64(Widthptr)
|
|
|
|
|
Thearch.Gmove(&src, cap)
|
|
|
|
|
|
|
|
|
|
// if len+argc <= cap goto L1
|
|
|
|
|
var rlen Node
|
|
|
|
|
Regalloc(&rlen, Types[TUINT], nil)
|
|
|
|
|
Thearch.Gmove(len, &rlen)
|
2016-03-03 15:08:25 -08:00
|
|
|
Thearch.Ginscon(Thearch.Optoas(OADD, Types[TUINT]), int64(nodeSeqLen(n.List)-1), &rlen)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
p := Thearch.Ginscmp(OLE, Types[TUINT], &rlen, cap, +1)
|
|
|
|
|
// Note: rlen and src are Regrealloc'ed below at the target of the
|
|
|
|
|
// branch we just emitted; do not reuse these Go variables for
|
|
|
|
|
// other purposes. They need to still describe the same things
|
|
|
|
|
// below that they describe right here.
|
|
|
|
|
Regfree(&src)
|
|
|
|
|
|
|
|
|
|
// base, len, cap = growslice(type, base, len, cap, newlen)
|
|
|
|
|
var arg Node
|
|
|
|
|
arg.Op = OINDREG
|
|
|
|
|
arg.Reg = int16(Thearch.REGSP)
|
|
|
|
|
arg.Addable = true
|
2015-10-08 22:13:44 +13:00
|
|
|
arg.Xoffset = Ctxt.FixedFrameSize()
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
arg.Type = Ptrto(Types[TUINT8])
|
|
|
|
|
Cgen(typename(res.Type), &arg)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[Tptr]
|
|
|
|
|
Cgen(base, &arg)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[TUINT]
|
|
|
|
|
Cgen(len, &arg)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[TUINT]
|
|
|
|
|
Cgen(cap, &arg)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[TUINT]
|
|
|
|
|
Cgen(&rlen, &arg)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
Regfree(&rlen)
|
|
|
|
|
|
2016-03-04 15:19:06 -08:00
|
|
|
fn := syslook("growslice")
|
|
|
|
|
substArgTypes(&fn, res.Type.Type, res.Type.Type)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Ginscall(fn, 0)
|
|
|
|
|
|
|
|
|
|
if Widthptr == 4 && Widthreg == 8 {
|
|
|
|
|
arg.Xoffset += 4
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[Tptr]
|
|
|
|
|
Cgen(&arg, base)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[TUINT]
|
|
|
|
|
Cgen(&arg, len)
|
|
|
|
|
arg.Xoffset += int64(Widthptr)
|
|
|
|
|
|
|
|
|
|
arg.Type = Types[TUINT]
|
|
|
|
|
Cgen(&arg, cap)
|
|
|
|
|
|
|
|
|
|
// Update res with base, len+argc, cap.
|
|
|
|
|
if needFullUpdate {
|
|
|
|
|
if Debug_append > 0 {
|
|
|
|
|
Warn("append: full update")
|
|
|
|
|
}
|
|
|
|
|
Patch(p, Pc)
|
|
|
|
|
}
|
|
|
|
|
if res.Op == ONAME {
|
|
|
|
|
Gvardef(res)
|
|
|
|
|
}
|
|
|
|
|
var dst, r1 Node
|
|
|
|
|
Igen(res, &dst, nil)
|
|
|
|
|
dst.Type = Types[TUINT]
|
|
|
|
|
dst.Xoffset += int64(Widthptr)
|
|
|
|
|
Regalloc(&r1, Types[TUINT], nil)
|
|
|
|
|
Thearch.Gmove(len, &r1)
|
2016-03-03 15:08:25 -08:00
|
|
|
Thearch.Ginscon(Thearch.Optoas(OADD, Types[TUINT]), int64(nodeSeqLen(n.List)-1), &r1)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Thearch.Gmove(&r1, &dst)
|
|
|
|
|
Regfree(&r1)
|
|
|
|
|
dst.Xoffset += int64(Widthptr)
|
|
|
|
|
Thearch.Gmove(cap, &dst)
|
|
|
|
|
dst.Type = Types[Tptr]
|
|
|
|
|
dst.Xoffset -= 2 * int64(Widthptr)
|
|
|
|
|
cgen_wb(base, &dst, needwritebarrier(&dst, base))
|
|
|
|
|
Regfree(&dst)
|
|
|
|
|
|
|
|
|
|
if !needFullUpdate {
|
|
|
|
|
if Debug_append > 0 {
|
|
|
|
|
Warn("append: len-only update")
|
|
|
|
|
}
|
|
|
|
|
// goto L2;
|
|
|
|
|
// L1:
|
|
|
|
|
// update len only
|
|
|
|
|
// L2:
|
|
|
|
|
q := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
Patch(p, Pc)
|
|
|
|
|
// At the goto above, src refers to cap and rlen holds the new len
|
|
|
|
|
if src.Op == OREGISTER || src.Op == OINDREG {
|
|
|
|
|
Regrealloc(&src)
|
|
|
|
|
}
|
|
|
|
|
Regrealloc(&rlen)
|
|
|
|
|
src.Xoffset -= int64(Widthptr)
|
|
|
|
|
Thearch.Gmove(&rlen, &src)
|
|
|
|
|
Regfree(&src)
|
|
|
|
|
Regfree(&rlen)
|
|
|
|
|
Patch(q, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Copy data into place.
|
|
|
|
|
// Could do write barrier check around entire copy instead of each element.
|
|
|
|
|
// Could avoid reloading registers on each iteration if we know the cgen_wb
|
|
|
|
|
// is not going to use a write barrier.
|
|
|
|
|
i := 0
|
|
|
|
|
var r2 Node
|
2016-03-03 15:08:25 -08:00
|
|
|
it := nodeSeqIterate(n.List)
|
|
|
|
|
it.Next()
|
|
|
|
|
for ; !it.Done(); it.Next() {
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Regalloc(&r1, Types[Tptr], nil)
|
|
|
|
|
Thearch.Gmove(base, &r1)
|
|
|
|
|
Regalloc(&r2, Types[TUINT], nil)
|
|
|
|
|
Thearch.Gmove(len, &r2)
|
|
|
|
|
if i > 0 {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[TUINT]), Nodintconst(int64(i)), &r2)
|
|
|
|
|
}
|
|
|
|
|
w := res.Type.Type.Width
|
|
|
|
|
if Thearch.AddIndex != nil && Thearch.AddIndex(&r2, w, &r1) {
|
|
|
|
|
// r1 updated by back end
|
|
|
|
|
} else if w == 1 {
|
|
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &r2, &r1)
|
|
|
|
|
} else {
|
2016-03-03 15:49:04 -08:00
|
|
|
Thearch.Ginscon(Thearch.Optoas(OMUL, Types[TUINT]), w, &r2)
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Thearch.Gins(Thearch.Optoas(OADD, Types[Tptr]), &r2, &r1)
|
|
|
|
|
}
|
|
|
|
|
Regfree(&r2)
|
|
|
|
|
|
|
|
|
|
r1.Op = OINDREG
|
|
|
|
|
r1.Type = res.Type.Type
|
2016-03-03 15:08:25 -08:00
|
|
|
cgen_wb(it.N(), &r1, needwritebarrier(&r1, it.N()))
|
cmd/internal/gc: optimize append + write barrier
The code generated for x = append(x, v) is roughly:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
}
t[len(t)] = v
len(t)++
x = t
We used to generate this code as Go pseudocode during walk.
Generate it instead as actual instructions during gen.
Doing so lets us apply a few optimizations. The most important
is that when, as in the above example, the source slice and the
destination slice are the same, the code can instead do:
t := x
if len(t)+1 > cap(t) {
t = grow(t)
x = {base(t), len(t)+1, cap(t)}
} else {
len(x)++
}
t[len(t)] = v
That is, in the fast path that does not reallocate the array,
only the updated length needs to be written back to x,
not the array pointer and not the capacity. This is more like
what you'd write by hand in C. It's faster in general, since
the fast path elides two of the three stores, but it's especially
faster when the form of x is such that the base pointer write
would turn into a write barrier. No write, no barrier.
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.81s × (0.98,1.03) +2.35% (p=0.023)
Fannkuch11 4.41s × (0.98,1.03) 4.35s × (1.00,1.00) ~ (p=0.090)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 86.0ns × (0.94,1.11) -7.31% (p=0.038)
FmtFprintfString 281ns × (0.96,1.08) 276ns × (0.98,1.04) ~ (p=0.219)
FmtFprintfInt 288ns × (0.97,1.06) 274ns × (0.98,1.06) -4.94% (p=0.002)
FmtFprintfIntInt 493ns × (0.97,1.04) 506ns × (0.99,1.01) +2.65% (p=0.009)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 391ns × (0.99,1.01) -7.52% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 566ns × (0.99,1.01) -5.27% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.91µs × (0.99,1.01) ~ (p=0.231)
GobDecode 14.8ms × (0.98,1.03) 15.3ms × (0.99,1.02) +3.01% (p=0.000)
GobEncode 12.3ms × (0.98,1.01) 11.5ms × (0.97,1.03) -5.93% (p=0.000)
Gzip 656ms × (0.99,1.05) 645ms × (0.99,1.01) ~ (p=0.055)
Gunzip 142ms × (1.00,1.00) 142ms × (1.00,1.00) -0.32% (p=0.034)
HTTPClientServer 91.2µs × (0.97,1.04) 90.5µs × (0.97,1.04) ~ (p=0.468)
JSONEncode 32.6ms × (0.97,1.08) 32.0ms × (0.98,1.03) ~ (p=0.190)
JSONDecode 114ms × (0.97,1.05) 114ms × (0.99,1.01) ~ (p=0.887)
Mandelbrot200 6.11ms × (0.98,1.04) 6.04ms × (1.00,1.01) ~ (p=0.167)
GoParse 6.66ms × (0.97,1.04) 6.47ms × (0.97,1.05) -2.81% (p=0.014)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 171ns × (0.93,1.07) +7.19% (p=0.002)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 550ns × (0.98,1.01) +2.30% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 135ns × (0.99,1.02) -1.60% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 879ns × (1.00,1.01) +1.08% (p=0.000)
RegexpMatchMedium_32 252ns × (0.99,1.01) 243ns × (1.00,1.00) -3.71% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 70.3µs × (1.00,1.00) -3.34% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.82µs × (1.00,1.01) -0.81% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 117µs × (1.00,1.00) -0.56% (p=0.000)
Revcomp 920ms × (0.97,1.07) 917ms × (0.97,1.04) ~ (p=0.808)
Template 129ms × (0.98,1.03) 114ms × (0.99,1.01) -12.06% (p=0.000)
TimeParse 619ns × (0.99,1.01) 622ns × (0.99,1.01) ~ (p=0.062)
TimeFormat 661ns × (0.98,1.04) 665ns × (0.99,1.01) ~ (p=0.524)
See next CL for combination with a similar optimization for slice.
The benchmarks that are slower in this CL are still faster overall
with the combination of the two.
Change-Id: I2a7421658091b2488c64741b4db15ab6c3b4cb7e
Reviewed-on: https://go-review.googlesource.com/9812
Reviewed-by: David Chase <drchase@google.com>
2015-05-06 12:34:30 -04:00
|
|
|
Regfree(&r1)
|
|
|
|
|
i++
|
|
|
|
|
}
|
|
|
|
|
}
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
|
|
|
|
|
// Generate res = n, where n is x[i:j] or x[i:j:k].
|
|
|
|
|
// If wb is true, need write barrier updating res's base pointer.
|
|
|
|
|
// On systems with 32-bit ints, i, j, k are guaranteed to be 32-bit values.
|
|
|
|
|
func cgen_slice(n, res *Node, wb bool) {
|
2015-05-19 03:39:30 -04:00
|
|
|
if Debug['g'] != 0 {
|
|
|
|
|
Dump("cgen_slice-n", n)
|
|
|
|
|
Dump("cgen_slice-res", res)
|
|
|
|
|
}
|
|
|
|
|
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
needFullUpdate := !samesafeexpr(n.Left, res)
|
|
|
|
|
|
|
|
|
|
// orderexpr has made sure that x is safe (but possibly expensive)
|
|
|
|
|
// and i, j, k are cheap. On a system with registers (anything but 386)
|
|
|
|
|
// we can evaluate x first and then know we have enough registers
|
|
|
|
|
// for i, j, k as well.
|
|
|
|
|
var x, xbase, xlen, xcap, i, j, k Node
|
|
|
|
|
if n.Op != OSLICEARR && n.Op != OSLICE3ARR {
|
|
|
|
|
Igen(n.Left, &x, nil)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
indexRegType := Types[TUINT]
|
|
|
|
|
if Widthreg > Widthptr { // amd64p32
|
|
|
|
|
indexRegType = Types[TUINT64]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// On most systems, we use registers.
|
|
|
|
|
// The 386 has basically no registers, so substitute functions
|
|
|
|
|
// that can work with temporaries instead.
|
|
|
|
|
regalloc := Regalloc
|
|
|
|
|
ginscon := Thearch.Ginscon
|
|
|
|
|
gins := Thearch.Gins
|
|
|
|
|
if Thearch.Thechar == '8' {
|
|
|
|
|
regalloc = func(n *Node, t *Type, reuse *Node) {
|
|
|
|
|
Tempname(n, t)
|
|
|
|
|
}
|
|
|
|
|
ginscon = func(as int, c int64, n *Node) {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, n.Type, n)
|
|
|
|
|
Thearch.Gmove(n, &n1)
|
|
|
|
|
Thearch.Ginscon(as, c, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, n)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
}
|
|
|
|
|
gins = func(as int, f, t *Node) *obj.Prog {
|
|
|
|
|
var n1 Node
|
|
|
|
|
Regalloc(&n1, t.Type, t)
|
|
|
|
|
Thearch.Gmove(t, &n1)
|
|
|
|
|
Thearch.Gins(as, f, &n1)
|
|
|
|
|
Thearch.Gmove(&n1, t)
|
|
|
|
|
Regfree(&n1)
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
panics := make([]*obj.Prog, 0, 6) // 3 loads + 3 checks
|
|
|
|
|
|
|
|
|
|
loadlen := func() {
|
|
|
|
|
if xlen.Op != 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
|
|
|
|
|
Nodconst(&xlen, indexRegType, n.Left.Type.Type.Bound)
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if n.Op == OSLICESTR && Isconst(n.Left, CTSTR) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&xlen, indexRegType, int64(len(n.Left.Val().U.(string))))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
regalloc(&xlen, indexRegType, nil)
|
|
|
|
|
x.Xoffset += int64(Widthptr)
|
|
|
|
|
x.Type = Types[TUINT]
|
|
|
|
|
Thearch.Gmove(&x, &xlen)
|
|
|
|
|
x.Xoffset -= int64(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
loadcap := func() {
|
|
|
|
|
if xcap.Op != 0 {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if n.Op == OSLICEARR || n.Op == OSLICE3ARR || n.Op == OSLICESTR {
|
|
|
|
|
loadlen()
|
|
|
|
|
xcap = xlen
|
|
|
|
|
if xcap.Op == OREGISTER {
|
|
|
|
|
Regrealloc(&xcap)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
regalloc(&xcap, indexRegType, nil)
|
|
|
|
|
x.Xoffset += 2 * int64(Widthptr)
|
|
|
|
|
x.Type = Types[TUINT]
|
|
|
|
|
Thearch.Gmove(&x, &xcap)
|
|
|
|
|
x.Xoffset -= 2 * int64(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
var x1, x2, x3 *Node // unevaluated index arguments
|
|
|
|
|
x1 = n.Right.Left
|
|
|
|
|
switch n.Op {
|
|
|
|
|
default:
|
|
|
|
|
x2 = n.Right.Right
|
|
|
|
|
case OSLICE3, OSLICE3ARR:
|
|
|
|
|
x2 = n.Right.Right.Left
|
|
|
|
|
x3 = n.Right.Right.Right
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// load computes src into targ, but if src refers to the len or cap of n.Left,
|
|
|
|
|
// load copies those from xlen, xcap, loading xlen if needed.
|
|
|
|
|
// If targ.Op == OREGISTER on return, it must be Regfreed,
|
|
|
|
|
// but it should not be modified without first checking whether it is
|
|
|
|
|
// xlen or xcap's register.
|
|
|
|
|
load := func(src, targ *Node) {
|
|
|
|
|
if src == nil {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
switch src.Op {
|
|
|
|
|
case OLITERAL:
|
|
|
|
|
*targ = *src
|
|
|
|
|
return
|
|
|
|
|
case OLEN:
|
|
|
|
|
// NOTE(rsc): This doesn't actually trigger, because order.go
|
|
|
|
|
// has pulled all the len and cap calls into separate assignments
|
|
|
|
|
// to temporaries. There are tests in test/sliceopt.go that could
|
|
|
|
|
// be enabled if this is fixed.
|
|
|
|
|
if samesafeexpr(n.Left, src.Left) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: reuse len")
|
|
|
|
|
}
|
|
|
|
|
loadlen()
|
|
|
|
|
*targ = xlen
|
|
|
|
|
if targ.Op == OREGISTER {
|
|
|
|
|
Regrealloc(targ)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
case OCAP:
|
|
|
|
|
// NOTE(rsc): This doesn't actually trigger; see note in case OLEN above.
|
|
|
|
|
if samesafeexpr(n.Left, src.Left) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: reuse cap")
|
|
|
|
|
}
|
|
|
|
|
loadcap()
|
|
|
|
|
*targ = xcap
|
|
|
|
|
if targ.Op == OREGISTER {
|
|
|
|
|
Regrealloc(targ)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if i.Op != 0 && samesafeexpr(x1, src) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: reuse 1st index")
|
|
|
|
|
}
|
|
|
|
|
*targ = i
|
|
|
|
|
if targ.Op == OREGISTER {
|
|
|
|
|
Regrealloc(targ)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if j.Op != 0 && samesafeexpr(x2, src) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: reuse 2nd index")
|
|
|
|
|
}
|
|
|
|
|
*targ = j
|
|
|
|
|
if targ.Op == OREGISTER {
|
|
|
|
|
Regrealloc(targ)
|
|
|
|
|
}
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
if Thearch.Cgenindex != nil {
|
|
|
|
|
regalloc(targ, indexRegType, nil)
|
|
|
|
|
p := Thearch.Cgenindex(src, targ, false)
|
|
|
|
|
if p != nil {
|
|
|
|
|
panics = append(panics, p)
|
|
|
|
|
}
|
|
|
|
|
} else if Thearch.Igenindex != nil {
|
|
|
|
|
p := Thearch.Igenindex(src, targ, false)
|
|
|
|
|
if p != nil {
|
|
|
|
|
panics = append(panics, p)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
regalloc(targ, indexRegType, nil)
|
|
|
|
|
var tmp Node
|
|
|
|
|
Cgenr(src, &tmp, targ)
|
|
|
|
|
Thearch.Gmove(&tmp, targ)
|
|
|
|
|
Regfree(&tmp)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
load(x1, &i)
|
|
|
|
|
load(x2, &j)
|
|
|
|
|
load(x3, &k)
|
|
|
|
|
|
|
|
|
|
// i defaults to 0.
|
|
|
|
|
if i.Op == 0 {
|
|
|
|
|
Nodconst(&i, indexRegType, 0)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// j defaults to len(x)
|
|
|
|
|
if j.Op == 0 {
|
|
|
|
|
loadlen()
|
|
|
|
|
j = xlen
|
|
|
|
|
if j.Op == OREGISTER {
|
|
|
|
|
Regrealloc(&j)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// k defaults to cap(x)
|
|
|
|
|
// Only need to load it if we're recalculating cap or doing a full update.
|
|
|
|
|
if k.Op == 0 && n.Op != OSLICESTR && (!iszero(&i) || needFullUpdate) {
|
|
|
|
|
loadcap()
|
|
|
|
|
k = xcap
|
|
|
|
|
if k.Op == OREGISTER {
|
|
|
|
|
Regrealloc(&k)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Check constant indexes for negative values, and against constant length if known.
|
|
|
|
|
// The func obvious below checks for out-of-order constant indexes.
|
|
|
|
|
var bound int64 = -1
|
|
|
|
|
if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
|
|
|
|
|
bound = n.Left.Type.Type.Bound
|
|
|
|
|
} else if n.Op == OSLICESTR && Isconst(n.Left, CTSTR) {
|
2015-05-27 00:47:05 -04:00
|
|
|
bound = int64(len(n.Left.Val().U.(string)))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
|
|
|
|
if Isconst(&i, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
if mpcmpfixc(i.Val().U.(*Mpint), 0) < 0 || bound >= 0 && mpcmpfixc(i.Val().U.(*Mpint), bound) > 0 {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
Yyerror("slice index out of bounds")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if Isconst(&j, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
if mpcmpfixc(j.Val().U.(*Mpint), 0) < 0 || bound >= 0 && mpcmpfixc(j.Val().U.(*Mpint), bound) > 0 {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
Yyerror("slice index out of bounds")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if Isconst(&k, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
if mpcmpfixc(k.Val().U.(*Mpint), 0) < 0 || bound >= 0 && mpcmpfixc(k.Val().U.(*Mpint), bound) > 0 {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
Yyerror("slice index out of bounds")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// same reports whether n1 and n2 are the same register or constant.
|
|
|
|
|
same := func(n1, n2 *Node) bool {
|
|
|
|
|
return n1.Op == OREGISTER && n2.Op == OREGISTER && n1.Reg == n2.Reg ||
|
|
|
|
|
n1.Op == ONAME && n2.Op == ONAME && n1.Orig == n2.Orig && n1.Type == n2.Type && n1.Xoffset == n2.Xoffset ||
|
2015-05-27 00:47:05 -04:00
|
|
|
n1.Op == OLITERAL && n2.Op == OLITERAL && Mpcmpfixfix(n1.Val().U.(*Mpint), n2.Val().U.(*Mpint)) == 0
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// obvious reports whether n1 <= n2 is obviously true,
|
|
|
|
|
// and it calls Yyerror if n1 <= n2 is obviously false.
|
|
|
|
|
obvious := func(n1, n2 *Node) bool {
|
|
|
|
|
if Debug['B'] != 0 { // -B disables bounds checks
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
if same(n1, n2) {
|
|
|
|
|
return true // n1 == n2
|
|
|
|
|
}
|
|
|
|
|
if iszero(n1) {
|
|
|
|
|
return true // using unsigned compare, so 0 <= n2 always true
|
|
|
|
|
}
|
|
|
|
|
if xlen.Op != 0 && same(n1, &xlen) && xcap.Op != 0 && same(n2, &xcap) {
|
|
|
|
|
return true // len(x) <= cap(x) always true
|
|
|
|
|
}
|
|
|
|
|
if Isconst(n1, CTINT) && Isconst(n2, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
if Mpcmpfixfix(n1.Val().U.(*Mpint), n2.Val().U.(*Mpint)) <= 0 {
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
return true // n1, n2 constants such that n1 <= n2
|
|
|
|
|
}
|
|
|
|
|
Yyerror("slice index out of bounds")
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
compare := func(n1, n2 *Node) {
|
2015-05-19 03:39:30 -04:00
|
|
|
// n1 might be a 64-bit constant, even on 32-bit architectures,
|
|
|
|
|
// but it will be represented in 32 bits.
|
|
|
|
|
if Ctxt.Arch.Regsize == 4 && Is64(n1.Type) {
|
2015-05-27 00:47:05 -04:00
|
|
|
if mpcmpfixc(n1.Val().U.(*Mpint), 1<<31) >= 0 {
|
2015-08-30 23:10:03 +02:00
|
|
|
Fatalf("missed slice out of bounds check")
|
2015-05-19 03:39:30 -04:00
|
|
|
}
|
|
|
|
|
var tmp Node
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&tmp, indexRegType, Mpgetfix(n1.Val().U.(*Mpint)))
|
2015-05-19 03:39:30 -04:00
|
|
|
n1 = &tmp
|
|
|
|
|
}
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
p := Thearch.Ginscmp(OGT, indexRegType, n1, n2, -1)
|
|
|
|
|
panics = append(panics, p)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
loadcap()
|
|
|
|
|
max := &xcap
|
|
|
|
|
if k.Op != 0 && (n.Op == OSLICE3 || n.Op == OSLICE3ARR) {
|
|
|
|
|
if obvious(&k, max) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: omit check for 3rd index")
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
compare(&k, max)
|
|
|
|
|
}
|
|
|
|
|
max = &k
|
|
|
|
|
}
|
|
|
|
|
if j.Op != 0 {
|
|
|
|
|
if obvious(&j, max) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: omit check for 2nd index")
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
compare(&j, max)
|
|
|
|
|
}
|
|
|
|
|
max = &j
|
|
|
|
|
}
|
|
|
|
|
if i.Op != 0 {
|
|
|
|
|
if obvious(&i, max) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: omit check for 1st index")
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
compare(&i, max)
|
|
|
|
|
}
|
|
|
|
|
max = &i
|
|
|
|
|
}
|
|
|
|
|
if k.Op != 0 && i.Op != 0 {
|
|
|
|
|
obvious(&i, &k) // emit compile-time error for x[3:n:2]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if len(panics) > 0 {
|
|
|
|
|
p := Gbranch(obj.AJMP, nil, 0)
|
|
|
|
|
for _, q := range panics {
|
|
|
|
|
Patch(q, Pc)
|
|
|
|
|
}
|
|
|
|
|
Ginscall(panicslice, -1)
|
|
|
|
|
Patch(p, Pc)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Checks are done.
|
|
|
|
|
// Compute new len as j-i, cap as k-i.
|
|
|
|
|
// If i and j are same register, len is constant 0.
|
|
|
|
|
// If i and k are same register, cap is constant 0.
|
|
|
|
|
// If j and k are same register, len and cap are same.
|
|
|
|
|
|
|
|
|
|
// Done with xlen and xcap.
|
|
|
|
|
// Now safe to modify j and k even if they alias xlen, xcap.
|
|
|
|
|
if xlen.Op == OREGISTER {
|
|
|
|
|
Regfree(&xlen)
|
|
|
|
|
}
|
|
|
|
|
if xcap.Op == OREGISTER {
|
|
|
|
|
Regfree(&xcap)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// are j and k the same value?
|
|
|
|
|
sameJK := same(&j, &k)
|
|
|
|
|
|
|
|
|
|
if i.Op != 0 {
|
|
|
|
|
// j -= i
|
|
|
|
|
if same(&i, &j) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: result len == 0")
|
|
|
|
|
}
|
|
|
|
|
if j.Op == OREGISTER {
|
|
|
|
|
Regfree(&j)
|
|
|
|
|
}
|
|
|
|
|
Nodconst(&j, indexRegType, 0)
|
|
|
|
|
} else {
|
|
|
|
|
switch j.Op {
|
|
|
|
|
case OLITERAL:
|
|
|
|
|
if Isconst(&i, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&j, indexRegType, Mpgetfix(j.Val().U.(*Mpint))-Mpgetfix(i.Val().U.(*Mpint)))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if Debug_slice > 0 {
|
2015-05-27 00:47:05 -04:00
|
|
|
Warn("slice: result len == %d", Mpgetfix(j.Val().U.(*Mpint)))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case ONAME:
|
|
|
|
|
if !istemp(&j) {
|
|
|
|
|
var r Node
|
|
|
|
|
regalloc(&r, indexRegType, nil)
|
|
|
|
|
Thearch.Gmove(&j, &r)
|
|
|
|
|
j = r
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case OREGISTER:
|
|
|
|
|
if i.Op == OLITERAL {
|
2015-05-27 00:47:05 -04:00
|
|
|
v := Mpgetfix(i.Val().U.(*Mpint))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if v != 0 {
|
|
|
|
|
ginscon(Thearch.Optoas(OSUB, indexRegType), v, &j)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
gins(Thearch.Optoas(OSUB, indexRegType), &i, &j)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// k -= i if k different from j and cap is needed.j
|
|
|
|
|
// (The modifications to j above cannot affect i: if j and i were aliased,
|
|
|
|
|
// we replace j with a constant 0 instead of doing a subtraction,
|
|
|
|
|
// leaving i unmodified.)
|
|
|
|
|
if k.Op == 0 {
|
|
|
|
|
if Debug_slice > 0 && n.Op != OSLICESTR {
|
|
|
|
|
Warn("slice: result cap not computed")
|
|
|
|
|
}
|
|
|
|
|
// no need
|
|
|
|
|
} else if same(&i, &k) {
|
|
|
|
|
if k.Op == OREGISTER {
|
|
|
|
|
Regfree(&k)
|
|
|
|
|
}
|
|
|
|
|
Nodconst(&k, indexRegType, 0)
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: result cap == 0")
|
|
|
|
|
}
|
|
|
|
|
} else if sameJK {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: result cap == result len")
|
|
|
|
|
}
|
|
|
|
|
// k and j were the same value; make k-i the same as j-i.
|
|
|
|
|
if k.Op == OREGISTER {
|
|
|
|
|
Regfree(&k)
|
|
|
|
|
}
|
|
|
|
|
k = j
|
|
|
|
|
if k.Op == OREGISTER {
|
|
|
|
|
Regrealloc(&k)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
switch k.Op {
|
|
|
|
|
case OLITERAL:
|
|
|
|
|
if Isconst(&i, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
Nodconst(&k, indexRegType, Mpgetfix(k.Val().U.(*Mpint))-Mpgetfix(i.Val().U.(*Mpint)))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if Debug_slice > 0 {
|
2015-05-27 00:47:05 -04:00
|
|
|
Warn("slice: result cap == %d", Mpgetfix(k.Val().U.(*Mpint)))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case ONAME:
|
|
|
|
|
if !istemp(&k) {
|
|
|
|
|
var r Node
|
|
|
|
|
regalloc(&r, indexRegType, nil)
|
|
|
|
|
Thearch.Gmove(&k, &r)
|
|
|
|
|
k = r
|
|
|
|
|
}
|
|
|
|
|
fallthrough
|
|
|
|
|
case OREGISTER:
|
|
|
|
|
if same(&i, &k) {
|
|
|
|
|
Regfree(&k)
|
|
|
|
|
Nodconst(&k, indexRegType, 0)
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: result cap == 0")
|
|
|
|
|
}
|
|
|
|
|
} else if i.Op == OLITERAL {
|
2015-05-27 00:47:05 -04:00
|
|
|
v := Mpgetfix(i.Val().U.(*Mpint))
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
if v != 0 {
|
|
|
|
|
ginscon(Thearch.Optoas(OSUB, indexRegType), v, &k)
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
gins(Thearch.Optoas(OSUB, indexRegType), &i, &k)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
adjustBase := true
|
|
|
|
|
if i.Op == 0 || iszero(&i) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
Warn("slice: skip base adjustment for 1st index 0")
|
|
|
|
|
}
|
|
|
|
|
adjustBase = false
|
|
|
|
|
} else if k.Op != 0 && iszero(&k) || k.Op == 0 && iszero(&j) {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
if n.Op == OSLICESTR {
|
|
|
|
|
Warn("slice: skip base adjustment for string len == 0")
|
|
|
|
|
} else {
|
|
|
|
|
Warn("slice: skip base adjustment for cap == 0")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
adjustBase = false
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if !adjustBase && !needFullUpdate {
|
|
|
|
|
if Debug_slice > 0 {
|
|
|
|
|
if k.Op != 0 {
|
|
|
|
|
Warn("slice: len/cap-only update")
|
|
|
|
|
} else {
|
|
|
|
|
Warn("slice: len-only update")
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if i.Op == OREGISTER {
|
|
|
|
|
Regfree(&i)
|
|
|
|
|
}
|
|
|
|
|
// Write len (and cap if needed) back to x.
|
|
|
|
|
x.Xoffset += int64(Widthptr)
|
|
|
|
|
x.Type = Types[TUINT]
|
|
|
|
|
Thearch.Gmove(&j, &x)
|
|
|
|
|
x.Xoffset -= int64(Widthptr)
|
|
|
|
|
if k.Op != 0 {
|
|
|
|
|
x.Xoffset += 2 * int64(Widthptr)
|
|
|
|
|
x.Type = Types[TUINT]
|
|
|
|
|
Thearch.Gmove(&k, &x)
|
|
|
|
|
x.Xoffset -= 2 * int64(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
Regfree(&x)
|
|
|
|
|
} else {
|
|
|
|
|
// Compute new base. May smash i.
|
|
|
|
|
if n.Op == OSLICEARR || n.Op == OSLICE3ARR {
|
|
|
|
|
Cgenr(n.Left, &xbase, nil)
|
|
|
|
|
Cgen_checknil(&xbase)
|
|
|
|
|
} else {
|
|
|
|
|
regalloc(&xbase, Ptrto(res.Type.Type), nil)
|
|
|
|
|
x.Type = xbase.Type
|
|
|
|
|
Thearch.Gmove(&x, &xbase)
|
|
|
|
|
Regfree(&x)
|
|
|
|
|
}
|
|
|
|
|
if i.Op != 0 && adjustBase {
|
|
|
|
|
// Branch around the base adjustment if the resulting cap will be 0.
|
|
|
|
|
var p *obj.Prog
|
|
|
|
|
size := &k
|
|
|
|
|
if k.Op == 0 {
|
|
|
|
|
size = &j
|
|
|
|
|
}
|
|
|
|
|
if Isconst(size, CTINT) {
|
|
|
|
|
// zero was checked above, must be non-zero.
|
|
|
|
|
} else {
|
|
|
|
|
var tmp Node
|
|
|
|
|
Nodconst(&tmp, indexRegType, 0)
|
|
|
|
|
p = Thearch.Ginscmp(OEQ, indexRegType, size, &tmp, -1)
|
|
|
|
|
}
|
|
|
|
|
var w int64
|
|
|
|
|
if n.Op == OSLICESTR {
|
|
|
|
|
w = 1 // res is string, elem size is 1 (byte)
|
|
|
|
|
} else {
|
|
|
|
|
w = res.Type.Type.Width // res is []T, elem size is T.width
|
|
|
|
|
}
|
|
|
|
|
if Isconst(&i, CTINT) {
|
2015-05-27 00:47:05 -04:00
|
|
|
ginscon(Thearch.Optoas(OADD, xbase.Type), Mpgetfix(i.Val().U.(*Mpint))*w, &xbase)
|
cmd/internal/gc: optimize slice + write barrier
The code generated for a slice x[i:j] or x[i:j:k] computes the entire
new slice (base, len, cap) and then uses it as the evaluation of the
slice expression.
If the slice is part of an update x = x[i:j] or x = x[i:j:k], there are
opportunities to avoid computing some of these fields.
For x = x[0:i], we know that only the len is changing;
base can be ignored completely, and cap can be left unmodified.
For x = x[0:i:j], we know that only len and cap are changing;
base can be ignored completely.
For x = x[i:i], we know that the resulting cap is zero, and we don't
adjust the base during a slice producing a zero-cap result,
so again base can be ignored completely.
No write to base, no write barrier.
The old slice code was trying to work at a Go syntax level, mainly
because that was how you wrote code just once instead of once
per architecture. Now the compiler is factored a bit better and we
can implement slice during code generation but still have one copy
of the code. So the new code is working at that lower level.
(It must, to update only parts of the result.)
This CL by itself:
name old mean new mean delta
BinaryTree17 5.81s × (0.98,1.03) 5.71s × (0.96,1.05) ~ (p=0.101)
Fannkuch11 4.35s × (1.00,1.00) 4.39s × (1.00,1.00) +0.79% (p=0.000)
FmtFprintfEmpty 86.0ns × (0.94,1.11) 82.6ns × (0.98,1.04) -3.86% (p=0.048)
FmtFprintfString 276ns × (0.98,1.04) 273ns × (0.98,1.02) ~ (p=0.235)
FmtFprintfInt 274ns × (0.98,1.06) 270ns × (0.99,1.01) ~ (p=0.119)
FmtFprintfIntInt 506ns × (0.99,1.01) 475ns × (0.99,1.01) -6.02% (p=0.000)
FmtFprintfPrefixedInt 391ns × (0.99,1.01) 393ns × (1.00,1.01) ~ (p=0.139)
FmtFprintfFloat 566ns × (0.99,1.01) 574ns × (1.00,1.01) +1.33% (p=0.001)
FmtManyArgs 1.91µs × (0.99,1.01) 1.87µs × (0.99,1.02) -1.83% (p=0.000)
GobDecode 15.3ms × (0.99,1.02) 15.0ms × (0.98,1.05) -1.84% (p=0.042)
GobEncode 11.5ms × (0.97,1.03) 11.4ms × (0.99,1.03) ~ (p=0.152)
Gzip 645ms × (0.99,1.01) 647ms × (0.99,1.01) ~ (p=0.265)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.90% (p=0.000)
HTTPClientServer 90.5µs × (0.97,1.04) 88.5µs × (0.99,1.03) -2.27% (p=0.014)
JSONEncode 32.0ms × (0.98,1.03) 29.6ms × (0.98,1.01) -7.51% (p=0.000)
JSONDecode 114ms × (0.99,1.01) 104ms × (1.00,1.01) -8.60% (p=0.000)
Mandelbrot200 6.04ms × (1.00,1.01) 6.02ms × (1.00,1.00) ~ (p=0.057)
GoParse 6.47ms × (0.97,1.05) 6.37ms × (0.97,1.04) ~ (p=0.105)
RegexpMatchEasy0_32 171ns × (0.93,1.07) 152ns × (0.99,1.01) -11.09% (p=0.000)
RegexpMatchEasy0_1K 550ns × (0.98,1.01) 530ns × (1.00,1.00) -3.78% (p=0.000)
RegexpMatchEasy1_32 135ns × (0.99,1.02) 134ns × (0.99,1.01) -1.33% (p=0.002)
RegexpMatchEasy1_1K 879ns × (1.00,1.01) 865ns × (1.00,1.00) -1.58% (p=0.000)
RegexpMatchMedium_32 243ns × (1.00,1.00) 233ns × (1.00,1.00) -4.30% (p=0.000)
RegexpMatchMedium_1K 70.3µs × (1.00,1.00) 69.5µs × (1.00,1.00) -1.13% (p=0.000)
RegexpMatchHard_32 3.82µs × (1.00,1.01) 3.74µs × (1.00,1.00) -1.95% (p=0.000)
RegexpMatchHard_1K 117µs × (1.00,1.00) 115µs × (1.00,1.00) -1.69% (p=0.000)
Revcomp 917ms × (0.97,1.04) 920ms × (0.97,1.04) ~ (p=0.786)
Template 114ms × (0.99,1.01) 117ms × (0.99,1.01) +2.58% (p=0.000)
TimeParse 622ns × (0.99,1.01) 615ns × (0.99,1.00) -1.06% (p=0.000)
TimeFormat 665ns × (0.99,1.01) 654ns × (0.99,1.00) -1.70% (p=0.000)
This CL and previous CL (append) combined:
name old mean new mean delta
BinaryTree17 5.68s × (0.97,1.04) 5.71s × (0.96,1.05) ~ (p=0.638)
Fannkuch11 4.41s × (0.98,1.03) 4.39s × (1.00,1.00) ~ (p=0.474)
FmtFprintfEmpty 92.7ns × (0.91,1.16) 82.6ns × (0.98,1.04) -10.89% (p=0.004)
FmtFprintfString 281ns × (0.96,1.08) 273ns × (0.98,1.02) ~ (p=0.078)
FmtFprintfInt 288ns × (0.97,1.06) 270ns × (0.99,1.01) -6.37% (p=0.000)
FmtFprintfIntInt 493ns × (0.97,1.04) 475ns × (0.99,1.01) -3.53% (p=0.002)
FmtFprintfPrefixedInt 423ns × (0.97,1.04) 393ns × (1.00,1.01) -7.07% (p=0.000)
FmtFprintfFloat 598ns × (0.99,1.01) 574ns × (1.00,1.01) -4.02% (p=0.000)
FmtManyArgs 1.89µs × (0.98,1.05) 1.87µs × (0.99,1.02) ~ (p=0.305)
GobDecode 14.8ms × (0.98,1.03) 15.0ms × (0.98,1.05) ~ (p=0.237)
GobEncode 12.3ms × (0.98,1.01) 11.4ms × (0.99,1.03) -6.95% (p=0.000)
Gzip 656ms × (0.99,1.05) 647ms × (0.99,1.01) ~ (p=0.101)
Gunzip 142ms × (1.00,1.00) 143ms × (1.00,1.01) +0.58% (p=0.001)
HTTPClientServer 91.2µs × (0.97,1.04) 88.5µs × (0.99,1.03) -3.02% (p=0.003)
JSONEncode 32.6ms × (0.97,1.08) 29.6ms × (0.98,1.01) -9.10% (p=0.000)
JSONDecode 114ms × (0.97,1.05) 104ms × (1.00,1.01) -8.74% (p=0.000)
Mandelbrot200 6.11ms × (0.98,1.04) 6.02ms × (1.00,1.00) ~ (p=0.090)
GoParse 6.66ms × (0.97,1.04) 6.37ms × (0.97,1.04) -4.41% (p=0.000)
RegexpMatchEasy0_32 159ns × (0.99,1.00) 152ns × (0.99,1.01) -4.69% (p=0.000)
RegexpMatchEasy0_1K 538ns × (1.00,1.01) 530ns × (1.00,1.00) -1.57% (p=0.000)
RegexpMatchEasy1_32 138ns × (1.00,1.00) 134ns × (0.99,1.01) -2.91% (p=0.000)
RegexpMatchEasy1_1K 869ns × (0.99,1.01) 865ns × (1.00,1.00) -0.51% (p=0.012)
RegexpMatchMedium_32 252ns × (0.99,1.01) 233ns × (1.00,1.00) -7.85% (p=0.000)
RegexpMatchMedium_1K 72.7µs × (1.00,1.00) 69.5µs × (1.00,1.00) -4.43% (p=0.000)
RegexpMatchHard_32 3.85µs × (1.00,1.00) 3.74µs × (1.00,1.00) -2.74% (p=0.000)
RegexpMatchHard_1K 118µs × (1.00,1.00) 115µs × (1.00,1.00) -2.24% (p=0.000)
Revcomp 920ms × (0.97,1.07) 920ms × (0.97,1.04) ~ (p=0.998)
Template 129ms × (0.98,1.03) 117ms × (0.99,1.01) -9.79% (p=0.000)
TimeParse 619ns × (0.99,1.01) 615ns × (0.99,1.00) -0.57% (p=0.011)
TimeFormat 661ns × (0.98,1.04) 654ns × (0.99,1.00) ~ (p=0.223)
Change-Id: If054d81ab2c71d8d62cf54b5b1fac2af66b387fc
Reviewed-on: https://go-review.googlesource.com/9813
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2015-05-06 12:35:53 -04:00
|
|
|
} else if Thearch.AddIndex != nil && Thearch.AddIndex(&i, w, &xbase) {
|
|
|
|
|
// done by back end
|
|
|
|
|
} else if w == 1 {
|
|
|
|
|
gins(Thearch.Optoas(OADD, xbase.Type), &i, &xbase)
|
|
|
|
|
} else {
|
|
|
|
|
if i.Op == ONAME && !istemp(&i) {
|
|
|
|
|
var tmp Node
|
|
|
|
|
Tempname(&tmp, i.Type)
|
|
|
|
|
Thearch.Gmove(&i, &tmp)
|
|
|
|
|
i = tmp
|
|
|
|
|
}
|
|
|
|
|
ginscon(Thearch.Optoas(OMUL, i.Type), w, &i)
|
|
|
|
|
gins(Thearch.Optoas(OADD, xbase.Type), &i, &xbase)
|
|
|
|
|
}
|
|
|
|
|
if p != nil {
|
|
|
|
|
Patch(p, Pc)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if i.Op == OREGISTER {
|
|
|
|
|
Regfree(&i)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Write len, cap, base to result.
|
|
|
|
|
if res.Op == ONAME {
|
|
|
|
|
Gvardef(res)
|
|
|
|
|
}
|
|
|
|
|
Igen(res, &x, nil)
|
|
|
|
|
x.Xoffset += int64(Widthptr)
|
|
|
|
|
x.Type = Types[TUINT]
|
|
|
|
|
Thearch.Gmove(&j, &x)
|
|
|
|
|
x.Xoffset -= int64(Widthptr)
|
|
|
|
|
if k.Op != 0 {
|
|
|
|
|
x.Xoffset += 2 * int64(Widthptr)
|
|
|
|
|
Thearch.Gmove(&k, &x)
|
|
|
|
|
x.Xoffset -= 2 * int64(Widthptr)
|
|
|
|
|
}
|
|
|
|
|
x.Type = xbase.Type
|
|
|
|
|
cgen_wb(&xbase, &x, wb)
|
|
|
|
|
Regfree(&xbase)
|
|
|
|
|
Regfree(&x)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if j.Op == OREGISTER {
|
|
|
|
|
Regfree(&j)
|
|
|
|
|
}
|
|
|
|
|
if k.Op == OREGISTER {
|
|
|
|
|
Regfree(&k)
|
|
|
|
|
}
|
|
|
|
|
}
|