Jakub Ciolek
d524e1eccd
cmd/compile: on AMD64, turn x < 128 into x <= 127
...
x < 128 -> x <= 127
x >= 128 -> x > 127
This allows for shorter encoding as 127 fits into
a single-byte immediate.
archive/tar benchmark (Alder Lake 12600K)
name old time/op new time/op delta
/Writer/USTAR-16 1.46µs ± 0% 1.32µs ± 0% -9.43% (p=0.008 n=5+5)
/Writer/GNU-16 1.85µs ± 1% 1.79µs ± 1% -3.47% (p=0.008 n=5+5)
/Writer/PAX-16 3.21µs ± 0% 3.11µs ± 2% -2.96% (p=0.008 n=5+5)
/Reader/USTAR-16 1.38µs ± 1% 1.37µs ± 0% ~ (p=0.127 n=5+4)
/Reader/GNU-16 798ns ± 1% 800ns ± 2% ~ (p=0.548 n=5+5)
/Reader/PAX-16 3.07µs ± 1% 3.00µs ± 0% -2.35% (p=0.008 n=5+5)
[Geo mean] 1.76µs 1.70µs -3.15%
compilecmp:
hash/maphash
hash/maphash.(*Hash).Write 517 -> 510 (-1.35%)
runtime
runtime.traceReadCPU 1626 -> 1615 (-0.68%)
runtime [cmd/compile]
runtime.traceReadCPU 1626 -> 1615 (-0.68%)
math/rand/v2
type:.eq.[128]float32 65 -> 59 (-9.23%)
bytes
bytes.trimLeftUnicode 378 -> 373 (-1.32%)
bytes.IndexAny 1189 -> 1157 (-2.69%)
bytes.LastIndexAny 1256 -> 1239 (-1.35%)
bytes.lastIndexFunc 263 -> 261 (-0.76%)
strings
strings.FieldsFuncSeq.func1 411 -> 399 (-2.92%)
strings.EqualFold 625 -> 624 (-0.16%)
strings.trimLeftUnicode 248 -> 231 (-6.85%)
math/rand
type:.eq.[128]float32 65 -> 59 (-9.23%)
bytes [cmd/compile]
bytes.LastIndexAny 1256 -> 1239 (-1.35%)
bytes.lastIndexFunc 263 -> 261 (-0.76%)
bytes.trimLeftUnicode 378 -> 373 (-1.32%)
bytes.IndexAny 1189 -> 1157 (-2.69%)
regexp/syntax
regexp/syntax.(*parser).parseEscape 1113 -> 1102 (-0.99%)
math/rand/v2 [cmd/compile]
type:.eq.[128]float32 65 -> 59 (-9.23%)
strings [cmd/compile]
strings.EqualFold 625 -> 624 (-0.16%)
strings.FieldsFuncSeq.func1 411 -> 399 (-2.92%)
strings.trimLeftUnicode 248 -> 231 (-6.85%)
math/rand [cmd/compile]
type:.eq.[128]float32 65 -> 59 (-9.23%)
regexp
regexp.(*inputString).context 198 -> 197 (-0.51%)
regexp.(*inputBytes).context 221 -> 212 (-4.07%)
image/jpeg
image/jpeg.(*decoder).processDQT 500 -> 491 (-1.80%)
regexp/syntax [cmd/compile]
regexp/syntax.(*parser).parseEscape 1113 -> 1102 (-0.99%)
regexp [cmd/compile]
regexp.(*inputString).context 198 -> 197 (-0.51%)
regexp.(*inputBytes).context 221 -> 212 (-4.07%)
encoding/csv
encoding/csv.(*Writer).fieldNeedsQuotes 269 -> 266 (-1.12%)
cmd/vendor/golang.org/x/sys/unix
type:.eq.[131]struct 855 -> 823 (-3.74%)
vendor/golang.org/x/text/unicode/norm
vendor/golang.org/x/text/unicode/norm.nextDecomposed 4831 -> 4826 (-0.10%)
vendor/golang.org/x/text/unicode/norm.(*Iter).returnSlice 281 -> 275 (-2.14%)
vendor/golang.org/x/text/secure/bidirule
vendor/golang.org/x/text/secure/bidirule.init.0 85 -> 83 (-2.35%)
go/scanner
go/scanner.isDigit 100 -> 98 (-2.00%)
go/scanner.(*Scanner).next 431 -> 422 (-2.09%)
go/scanner.isLetter 142 -> 124 (-12.68%)
encoding/asn1
encoding/asn1.parseTagAndLength 1189 -> 1182 (-0.59%)
encoding/asn1.makeField 3481 -> 3463 (-0.52%)
text/scanner
text/scanner.(*Scanner).next 1242 -> 1236 (-0.48%)
archive/tar
archive/tar.isASCII 133 -> 127 (-4.51%)
archive/tar.(*Writer).writeRawFile 1206 -> 1198 (-0.66%)
archive/tar.(*Reader).readHeader.func1 9 -> 7 (-22.22%)
archive/tar.toASCII 393 -> 383 (-2.54%)
archive/tar.splitUSTARPath 405 -> 396 (-2.22%)
archive/tar.(*Writer).writePAXHeader.func1 627 -> 620 (-1.12%)
text/template
text/template.jsIsSpecial 59 -> 57 (-3.39%)
go/doc
go/doc.assumedPackageName 714 -> 701 (-1.82%)
vendor/golang.org/x/net/http/httpguts
vendor/golang.org/x/net/http/httpguts.headerValueContainsToken 965 -> 952 (-1.35%)
vendor/golang.org/x/net/http/httpguts.tokenEqual 280 -> 269 (-3.93%)
vendor/golang.org/x/net/http/httpguts.IsTokenRune 28 -> 26 (-7.14%)
net/mail
net/mail.isVchar 26 -> 24 (-7.69%)
net/mail.isAtext 106 -> 104 (-1.89%)
net/mail.(*Address).String 1084 -> 1052 (-2.95%)
net/mail.isQtext 39 -> 37 (-5.13%)
net/mail.isMultibyte 9 -> 7 (-22.22%)
net/mail.isDtext 45 -> 43 (-4.44%)
net/mail.(*addrParser).consumeQuotedString 1050 -> 1029 (-2.00%)
net/mail.quoteString 741 -> 714 (-3.64%)
cmd/internal/obj/s390x
cmd/internal/obj/s390x.preprocess 6405 -> 6393 (-0.19%)
cmd/internal/obj/x86
cmd/internal/obj/x86.toDisp8 303 -> 301 (-0.66%)
fmt [cmd/compile]
fmt.Fprintf 4726 -> 4662 (-1.35%)
go/scanner [cmd/compile]
go/scanner.(*Scanner).next 431 -> 422 (-2.09%)
go/scanner.isLetter 142 -> 124 (-12.68%)
go/scanner.isDigit 100 -> 98 (-2.00%)
cmd/compile/internal/syntax
cmd/compile/internal/syntax.(*source).nextch 879 -> 847 (-3.64%)
cmd/vendor/golang.org/x/mod/module
cmd/vendor/golang.org/x/mod/module.checkElem 1253 -> 1235 (-1.44%)
cmd/vendor/golang.org/x/mod/module.escapeString 519 -> 517 (-0.39%)
go/doc [cmd/compile]
go/doc.assumedPackageName 714 -> 701 (-1.82%)
cmd/compile/internal/syntax [cmd/compile]
cmd/compile/internal/syntax.(*scanner).escape 1965 -> 1933 (-1.63%)
cmd/compile/internal/syntax.(*scanner).next 8975 -> 8847 (-1.43%)
cmd/internal/obj/s390x [cmd/compile]
cmd/internal/obj/s390x.preprocess 6405 -> 6393 (-0.19%)
cmd/internal/obj/x86 [cmd/compile]
cmd/internal/obj/x86.toDisp8 303 -> 301 (-0.66%)
cmd/internal/gcprog
cmd/internal/gcprog.(*Writer).Repeat 688 -> 677 (-1.60%)
cmd/internal/gcprog.(*Writer).varint 442 -> 439 (-0.68%)
cmd/compile/internal/ir
cmd/compile/internal/ir.splitPkg 331 -> 325 (-1.81%)
cmd/compile/internal/ir [cmd/compile]
cmd/compile/internal/ir.splitPkg 331 -> 325 (-1.81%)
net/http
net/http.containsDotDot.FieldsFuncSeq.func1 411 -> 399 (-2.92%)
net/http.isNotToken 33 -> 30 (-9.09%)
net/http.containsDotDot 606 -> 588 (-2.97%)
net/http.isCookieNameValid 197 -> 191 (-3.05%)
net/http.parsePattern 4330 -> 4317 (-0.30%)
net/http.ParseCookie 1099 -> 1096 (-0.27%)
net/http.validMethod 197 -> 187 (-5.08%)
cmd/vendor/golang.org/x/text/unicode/norm
cmd/vendor/golang.org/x/text/unicode/norm.(*Iter).returnSlice 281 -> 275 (-2.14%)
cmd/vendor/golang.org/x/text/unicode/norm.nextDecomposed 4831 -> 4826 (-0.10%)
net/http/cookiejar
net/http/cookiejar.encode 1936 -> 1918 (-0.93%)
expvar
expvar.appendJSONQuote 972 -> 965 (-0.72%)
cmd/cgo/internal/test
cmd/cgo/internal/test.stack128 116 -> 114 (-1.72%)
cmd/vendor/rsc.io/markdown
cmd/vendor/rsc.io/markdown.newATXHeading 1637 -> 1628 (-0.55%)
cmd/vendor/rsc.io/markdown.isUnicodePunct 197 -> 179 (-9.14%)
Change-Id: I578bdf42ef229d687d526e378d697ced51e1880c
Reviewed-on: https://go-review.googlesource.com/c/go/+/639935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Keith Randall <khr@google.com>
2025-02-16 07:23:13 -08:00
khr@golang.org
7273509466
cmd/compile: add additional arm64 bit field rules
...
Get rid of TODO in prove pass.
We currently avoid marking shifts of constants as bounded, where
bounded means we don't have to worry about <0 or >=bitwidth shifts.
We do this because it causes different rule applications during lowering
which cause some codegen tests to fail.
Add some new rules which ensure that we get the right final instruction
sequence regardless of the ordering. Then we can remove this special case.
Change-Id: I4e962d4f09992b42ab47e123de5ded3b8b8fb205
Reviewed-on: https://go-review.googlesource.com/c/go/+/602935
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2024-08-12 21:03:55 +00:00
khr@golang.org
3b96eebcbd
cmd/compile: rewrite the constant parts of the prove pass
...
Handles a lot more cases where constant ranges can eliminate
various (mostly bounds failure) paths.
Fixes #66826
Fixes #66692
Fixes #48213
Update #57959
TODO: remove constant logic from poset code, no longer needed.
Change-Id: Id196436fcd8a0c84c7d59c04f93bd92e26a0fd7e
Reviewed-on: https://go-review.googlesource.com/c/go/+/599096
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
2024-08-07 16:07:33 +00:00
khr@golang.org
1a0b86375f
cmd/compile: remove redundant calls to cmpstring
...
The results of cmpstring are reuseable if the second call has the
same arguments and memory.
Note that this gets rid of cmpstring, but we still generate a
redundant </<= test and branch afterwards, because the compiler
doesn't know that cmpstring only ever returns -1,0,1.
Update #61725
Change-Id: I93a0d1ccca50d90b1e1a888240ffb75a3b10b59b
Reviewed-on: https://go-review.googlesource.com/c/go/+/578835
Reviewed-by: David Chase <drchase@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2024-04-19 16:31:02 +00:00
eric fang
ace1494d92
cmd/compile: optimize absorbing InvertFlags into Noov comparisons for arm64
...
Previously (LessThanNoov (InvertFlags x)) is lowered as:
CSET
CSET
BIC
With this CL it's lowered as:
CSET
CSEL
This saves one instruction.
Similarly (GreaterEqualNoov (InvertFlags x)) is now lowered as:
CSET
CSINC
$ benchstat old.bench new.bench
goos: linux
goarch: arm64
│ old.bench │ new.bench │
│ sec/op │ sec/op vs base │
InvertLessThanNoov-160 2.249n ± 2% 2.190n ± 1% -2.62% (p=0.003 n=10)
Change-Id: Idd8979b7f4fe466e74b1a201c4aba7f1b0cffb0b
Reviewed-on: https://go-review.googlesource.com/c/go/+/526237
Reviewed-by: Heschi Kreinick <heschi@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2023-09-21 02:36:06 +00:00
Keith Randall
6b165577fe
cmd/compile: remove memequal call from string compares in more cases
...
Add more rules to ensure that order doesn't matter.
Add memequal 0 rule.
Try to use a constant argument to memequal when one is available.
Fixes #59684
Change-Id: I36e85ffbd949396ed700ed6e8ec2bc3ae013f5d2
Reviewed-on: https://go-review.googlesource.com/c/go/+/485535
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
2023-04-18 21:31:33 +00:00
ruinan
9be533a8ee
cmd/compile: get more bounds info from logic operators in prove pass
...
Currently, the prove pass can get knowledge from some specific logic
operators only before the CFG is explored, which means that the bounds
information of the branch will be ignored.
This CL updates the facts table by the logic operators in every
branch. Combined with the branch information, this will be helpful for
BCE in some circumstances.
Fixes #57243
Change-Id: I0bd164f1b47804ccfc37879abe9788740b016fd5
Reviewed-on: https://go-review.googlesource.com/c/go/+/419555
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
2023-04-07 10:09:11 +00:00
erifan01
42f99b203d
cmd/compile: optimize cmp to cmn under conditions < and >= on arm64
...
Under the right conditions we can optimize cmp comparisons to cmn
comparisons, such as:
func foo(a, b int) int {
var c int
if a + b < 0 {
c = 1
}
return c
}
Previously it's compiled as:
ADD R1, R0, R1
CMP $0, R1
CSET LT, R0
With this CL it's compiled as:
CMN R1, R0
CSET MI, R0
Here we need to pay attention to the overflow situation of a+b, the MI
flag means N==1, which doesn't honor the overflow flag V, its value
depends only on the sign of the result. So it has the same semantic of
the Go code, so it's correct.
Similarly, this CL also optimizes the case of >= comparison
using the PL conditional flag.
Change-Id: I47179faba5b30cca84ea69bafa2ad5241bf6dfba
Reviewed-on: https://go-review.googlesource.com/c/go/+/476116
Run-TryBot: Eric Fang <eric.fang@arm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: David Chase <drchase@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2023-03-24 01:19:09 +00:00
erifan01
91a2e921dd
cmd/compile: fix incorrect truncating when converting CMP to TST on arm64
...
CL 420434 optimized CMP into TST in some situations, but it has a bug,
these four rules are not correct:
(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y))
(LessEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y))
(GreaterThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y))
(GreaterEqual (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y))
But due to the existence of this rule
(LessThan (CMPWconst [0] x:(ANDconst [c] y))) && x.Uses == 1 =>
(LessThan (TSTWconst [int32(c)] y)), the above rules have never been
fired. This CL corrects them as:
(LessThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessThan (TSTconst [c] y))
(LessEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (LessEqual (TSTconst [c] y))
(GreaterThan (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterThan (TSTconst [c] y))
(GreaterEqual (CMPconst [0] x:(ANDconst [c] y))) && x.Uses == 1 => (GreaterEqual (TSTconst [c] y))
Change-Id: I7d60bcc9a266ee58388baeaab9f493b57cf1ad55
Reviewed-on: https://go-review.googlesource.com/c/go/+/473617
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
Run-TryBot: Eric Fang <eric.fang@arm.com>
2023-03-22 08:32:53 +00:00
Paul E. Murphy
0301c6c351
test/codegen: combine trivial PPC64 tests into ppc64x
...
Use a small python script to consolidate duplicate
ppc64/ppc64le tests into a single ppc64x codegen test.
This makes small assumption that anytime two tests with
for different arch/variant combos exists, those tests
can be combined into a single ppc64x test.
E.x:
// ppc64le: foo
// ppc64le/power9: foo
into
// ppc64x: foo
or
// ppc64: foo
// ppc64le: foo
into
// ppc64x: foo
import glob
import re
files = glob.glob("codegen/*.go")
for file in files:
with open(file) as f:
text = [l for l in f]
i = 0
while i < len(text):
first = re.match("\s*// ?ppc64(le)?(/power[89])?:(.*)", text[i])
if first:
j = i+1
while j < len(text):
second = re.match("\s*// ?ppc64(le)?(/power[89])?:(.*)", text[j])
if not second:
break
if (not first.group(2) or first.group(2) == second.group(2)) and first.group(3) == second.group(3):
text[i] = re.sub(" ?ppc64(le|x)?"," ppc64x",text[i])
text=text[:j] + (text[j+1:])
else:
j += 1
i+=1
with open(file, 'w') as f:
f.write("".join(text))
Change-Id: Ic6b009b54eacaadc5a23db9c5a3bf7331b595821
Reviewed-on: https://go-review.googlesource.com/c/go/+/463220
Reviewed-by: Cherry Mui <cherryyz@google.com>
Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Bryan Mills <bcmills@google.com>
Run-TryBot: Paul Murphy <murp@ibm.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
2023-01-27 18:24:12 +00:00
eric fang
ddc7d2a80c
cmd/compile: add late lower pass for last rules to run
...
Usually optimization rules have corresponding priorities, some need to
be run first, some run next, and some run last, which produces the best
code. But currently our optimization rules have no priority, this CL
adds a late lower pass that runs those rules that need to be run at last,
such as split unreasonable constant folding. This pass can be seen as
the second round of the lower pass.
For example:
func foo(a, b uint64) uint64 {
d := a+0x1234568
d1 := b+0x1234568
return d&d1
}
The code generated by the master branch:
0x0004 00004 ADD $19088744, R0, R2 // movz+movk+add
0x0010 00016 ADD $19088744, R1, R1 // movz+movk+add
0x001c 00028 AND R1, R2, R0
This is because the current constant folding optimization rules do not
take into account the range of constants, causing the constant to be
loaded repeatedly. This CL splits these unreasonable constants folding
in the late lower pass. With this CL the generated code:
0x0004 00004 MOVD $19088744, R2 // movz+movk
0x000c 00012 ADD R0, R2, R3
0x0010 00016 ADD R1, R2, R1
0x0014 00020 AND R1, R3, R0
This CL also adds constant folding optimization for ADDS instruction.
In addition, in order not to introduce the codegen regression, an
optimization rule is added to change the addition of a negative number
into a subtraction of a positive number.
go1 benchmarks:
name old time/op new time/op delta
BinaryTree17-8 1.22s ± 1% 1.24s ± 0% +1.56% (p=0.008 n=5+5)
Fannkuch11-8 1.54s ± 0% 1.53s ± 0% -0.69% (p=0.016 n=4+5)
FmtFprintfEmpty-8 14.1ns ± 0% 14.1ns ± 0% ~ (p=0.079 n=4+5)
FmtFprintfString-8 26.0ns ± 0% 26.1ns ± 0% +0.23% (p=0.008 n=5+5)
FmtFprintfInt-8 32.3ns ± 0% 32.9ns ± 1% +1.72% (p=0.008 n=5+5)
FmtFprintfIntInt-8 54.5ns ± 0% 55.5ns ± 0% +1.83% (p=0.008 n=5+5)
FmtFprintfPrefixedInt-8 61.5ns ± 0% 62.0ns ± 0% +0.93% (p=0.008 n=5+5)
FmtFprintfFloat-8 72.0ns ± 0% 73.6ns ± 0% +2.24% (p=0.008 n=5+5)
FmtManyArgs-8 221ns ± 0% 224ns ± 0% +1.22% (p=0.008 n=5+5)
GobDecode-8 1.91ms ± 0% 1.93ms ± 0% +0.98% (p=0.008 n=5+5)
GobEncode-8 1.40ms ± 1% 1.39ms ± 0% -0.79% (p=0.032 n=5+5)
Gzip-8 115ms ± 0% 117ms ± 1% +1.17% (p=0.008 n=5+5)
Gunzip-8 19.4ms ± 1% 19.3ms ± 0% -0.71% (p=0.016 n=5+4)
HTTPClientServer-8 27.0µs ± 0% 27.3µs ± 0% +0.80% (p=0.008 n=5+5)
JSONEncode-8 3.36ms ± 1% 3.33ms ± 0% ~ (p=0.056 n=5+5)
JSONDecode-8 17.5ms ± 2% 17.8ms ± 0% +1.71% (p=0.016 n=5+4)
Mandelbrot200-8 2.29ms ± 0% 2.29ms ± 0% ~ (p=0.151 n=5+5)
GoParse-8 1.35ms ± 1% 1.36ms ± 1% ~ (p=0.056 n=5+5)
RegexpMatchEasy0_32-8 24.5ns ± 0% 24.5ns ± 0% ~ (p=0.444 n=4+5)
RegexpMatchEasy0_1K-8 131ns ±11% 118ns ± 6% ~ (p=0.056 n=5+5)
RegexpMatchEasy1_32-8 22.9ns ± 0% 22.9ns ± 0% ~ (p=0.905 n=4+5)
RegexpMatchEasy1_1K-8 126ns ± 0% 127ns ± 0% ~ (p=0.063 n=4+5)
RegexpMatchMedium_32-8 486ns ± 5% 483ns ± 0% ~ (p=0.381 n=5+4)
RegexpMatchMedium_1K-8 15.4µs ± 1% 15.5µs ± 0% ~ (p=0.151 n=5+5)
RegexpMatchHard_32-8 687ns ± 0% 686ns ± 0% ~ (p=0.103 n=5+5)
RegexpMatchHard_1K-8 20.7µs ± 0% 20.7µs ± 1% ~ (p=0.151 n=5+5)
Revcomp-8 175ms ± 2% 176ms ± 3% ~ (p=1.000 n=5+5)
Template-8 20.4ms ± 6% 20.1ms ± 2% ~ (p=0.151 n=5+5)
TimeParse-8 112ns ± 0% 113ns ± 0% +0.97% (p=0.016 n=5+4)
TimeFormat-8 156ns ± 0% 145ns ± 0% -7.14% (p=0.029 n=4+4)
Change-Id: I3ced26e89041f873ac989586514ccc5ee09f13da
Reviewed-on: https://go-review.googlesource.com/c/go/+/425134
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
2022-10-05 02:40:56 +00:00
eric fang
cf53990b18
cmd/compile: Add some CMP and CMN optimization rules on arm64
...
This CL adds some optimizaion rules:
1, Converts CMP to CMN, or vice versa, when comparing with a negative
number.
2, For equal and not equal comparisons, CMP can be converted to CMN in
some cases. In theory we could do the same optimization for LT, LE, GT
and GE, but need to account for overflow, this CL doesn't handle them.
There are no noticeable performance changes.
Change-Id: Ia49266c019ab7908ebc9510c2f02e121b1607869
Reviewed-on: https://go-review.googlesource.com/c/go/+/429795
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Eric Fang <eric.fang@arm.com>
2022-09-20 01:14:40 +00:00
Derek Parker
6605686e3b
cmd/compile: new inline heuristic for struct compares
...
This CL changes the heuristic used to determine whether we can inline a
struct equality check or if we must generate a function and call that
function for equality.
The old method was to count struct fields, but this can lead to poor
in lining decisions. We should really be determining the cost of the
equality check and use that to determine if we should inline or generate
a function.
The new benchmark provided in this CL returns the following when compared
against tip:
```
name old time/op new time/op delta
EqStruct-32 2.46ns ± 4% 0.25ns ±10% -89.72% (p=0.000 n=39+39)
```
Fixes #38494
Change-Id: Ie06b80a2b2a03a3fd0978bcaf7715f9afb66e0ab
GitHub-Last-Rev: e9a18d9389
GitHub-Pull-Request: golang/go#53326
Reviewed-on: https://go-review.googlesource.com/c/go/+/411674
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Heschi Kreinick <heschi@google.com>
2022-09-02 17:48:22 +00:00
Matthew Dempsky
ab8d7dd75e
cmd/compile: set LocalPkg.Path to -p flag
...
Since CL 391014, cmd/compile now requires the -p flag to be set the
build system. This CL changes it to initialize LocalPkg.Path to the
provided path, rather than relying on writing out `"".` into object
files and expecting cmd/link to substitute them.
However, this actually involved a rather long tail of fixes. Many have
already been submitted, but a few notable ones that have to land
simultaneously with changing LocalPkg:
1. When compiling package runtime, there are really two "runtime"
packages: types.LocalPkg (the source package itself) and
ir.Pkgs.Runtime (the compiler's internal representation, for synthetic
references). Previously, these ended up creating separate link
symbols (`"".xxx` and `runtime.xxx`, respectively), but now they both
end up as `runtime.xxx`, which causes lsym collisions (notably
inittask and funcsyms).
2. test/codegen tests need to be updated to expect symbols to be named
`command-line-arguments.xxx` rather than `"".foo`.
3. The issue20014 test case is sensitive to the sort order of field
tracking symbols. In particular, the local package now sorts to its
natural place in the list, rather than to the front.
Thanks to David Chase for helping track down all of the fixes needed
for this CL.
Updates #51734 .
Change-Id: Iba3041cf7ad967d18c6e17922fa06ba11798b565
Reviewed-on: https://go-review.googlesource.com/c/go/+/393715
Reviewed-by: David Chase <drchase@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Cuong Manh Le <cuong.manhle.vn@gmail.com>
2022-05-16 18:19:47 +00:00
Jorropo
e1e056fa6a
cmd/compile: fold constants found by prove
...
It is hit ~70k times building go.
This make the go binary, 0.04% smaller.
I didn't included benchmarks because this is just constant foldings
and is hard to mesure objectively.
For example, this enable rewriting things like:
if x == 20 {
return x + 30 + z
}
Into:
if x == 20 {
return 50 + z
}
It's not just fixing programer's code,
the ssa generator generate code like this sometimes.
Change-Id: I0861f342b27f7227b5f1c34d8267fa0057b1bbbc
GitHub-Last-Rev: 4c2f9b5216
GitHub-Pull-Request: golang/go#52669
Reviewed-on: https://go-review.googlesource.com/c/go/+/403735
Reviewed-by: Keith Randall <khr@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2022-05-04 20:30:17 +00:00
Archana R
8b9c0d1a79
test/codegen: updated comparison test to verify on ppc64,ppc64le
...
Updated test/codegen/comparison.go to verify memequal is inlined
as implemented in CL 328291.
Change-Id: If7824aed37ee1f8640e54fda0f9b7610582ba316
Reviewed-on: https://go-review.googlesource.com/c/go/+/357289
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
2021-10-21 13:05:07 +00:00
Ruslan Andreev
810b08b8ec
cmd/compile: inline memequal(x, const, sz) for small sizes
...
This CL adds late expanded memequal(x, const, sz) inlining for 2, 4, 8
bytes size. This PoC is using the same method as CL 248404.
This optimization fires about 100 times in Go compiler (1675 occurrences
reduced to 1574, so -6%).
Also, added unit-tests to codegen/comparisions.go file.
Updates #37275
Change-Id: Ia52808d573cb706d1da8166c5746ede26f46c5da
Reviewed-on: https://go-review.googlesource.com/c/go/+/328291
Reviewed-by: Cherry Mui <cherryyz@google.com>
Run-TryBot: Cherry Mui <cherryyz@google.com>
Trust: David Chase <drchase@google.com>
2021-10-06 13:47:50 +00:00
Cherry Zhang
263e13d1f7
test: make codegen tests work with both ABIs
...
Some codegen tests were written with the assumption that
arguments and results are in memory, and with a specific stack
layout. With the register ABI, the assumption is no longer true.
Adjust the tests to work with both cases.
- For tests expecting in memory arguments/results, change to use
global variables or memory-assigned argument/results.
- Allow more registers. E.g. some tests expecting register names
contain only letters (e.g. AX), but it can also contain numbers
(e.g. R10).
- Some instruction selection changes when operate on register vs.
memory, e.g. ADDQ vs. LEAQ, MOVB vs. MOVL. Accept both.
TODO: mathbits.go and memops.go still need fix.
Change-Id: Ic5932b4b5dd3f5d30ed078d296476b641420c4c5
Reviewed-on: https://go-review.googlesource.com/c/go/+/309335
Trust: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: David Chase <drchase@google.com>
2021-04-12 21:59:59 +00:00
erifan01
600259b099
cmd/compile: use depth first topological sort algorithm for layout
...
The current layout algorithm tries to put consecutive blocks together,
so the priority of the successor block is higher than the priority of
the zero indegree block. This algorithm is beneficial for subsequent
register allocation, but will result in more branch instructions.
The depth-first topological sorting algorithm is a well-known layout
algorithm, which has applications in many languages, and it helps to
reduce branch instructions. This CL applies it to the layout pass.
The test results show that it helps to reduce the code size.
This CL also includes the following changes:
1, Removed the primary predecessor mechanism. The new layout algorithm is
not very friendly to register allocator in some cases, in order to adapt
to the new layout algorithm, a new primary predecessor selection strategy
is introduced.
2, Since the new layout implementation may place non-loop blocks between
loop blocks, some adaptive modifications have also been made to looprotate
pass.
3, The layout also affects the results of codegen, so this CL also adjusted
several codegen tests accordingly.
It is inevitable that this CL will cause the code size or performance of a
few functions to decrease, but the number of cases it improves is much larger
than the number of cases it drops.
Statistical data from compilecmp on linux/amd64 is as follow:
name old time/op new time/op delta
Template 382ms ± 4% 382ms ± 4% ~ (p=0.497 n=49+50)
Unicode 170ms ± 9% 169ms ± 8% ~ (p=0.344 n=48+50)
GoTypes 2.01s ± 4% 2.01s ± 4% ~ (p=0.628 n=50+48)
Compiler 190ms ±10% 189ms ± 9% ~ (p=0.734 n=50+50)
SSA 11.8s ± 2% 11.8s ± 3% ~ (p=0.877 n=50+50)
Flate 241ms ± 9% 241ms ± 8% ~ (p=0.897 n=50+49)
GoParser 366ms ± 3% 361ms ± 4% -1.21% (p=0.004 n=47+50)
Reflect 835ms ± 3% 838ms ± 3% ~ (p=0.275 n=50+49)
Tar 336ms ± 4% 335ms ± 3% ~ (p=0.454 n=48+48)
XML 433ms ± 4% 431ms ± 3% ~ (p=0.071 n=49+48)
LinkCompiler 706ms ± 4% 705ms ± 4% ~ (p=0.608 n=50+49)
ExternalLinkCompiler 1.85s ± 3% 1.83s ± 2% -1.47% (p=0.000 n=49+48)
LinkWithoutDebugCompiler 437ms ± 5% 437ms ± 6% ~ (p=0.953 n=49+50)
[Geo mean] 615ms 613ms -0.37%
name old alloc/op new alloc/op delta
Template 38.7MB ± 1% 38.7MB ± 1% ~ (p=0.834 n=50+50)
Unicode 28.1MB ± 0% 28.1MB ± 0% -0.22% (p=0.000 n=49+50)
GoTypes 168MB ± 1% 168MB ± 1% ~ (p=0.054 n=47+47)
Compiler 23.0MB ± 1% 23.0MB ± 1% ~ (p=0.432 n=50+50)
SSA 1.54GB ± 0% 1.54GB ± 0% +0.21% (p=0.000 n=50+50)
Flate 23.6MB ± 1% 23.6MB ± 1% ~ (p=0.153 n=43+46)
GoParser 35.1MB ± 1% 35.1MB ± 2% ~ (p=0.202 n=50+50)
Reflect 84.7MB ± 1% 84.7MB ± 1% ~ (p=0.333 n=48+49)
Tar 34.5MB ± 1% 34.5MB ± 1% ~ (p=0.406 n=46+49)
XML 44.3MB ± 2% 44.2MB ± 3% ~ (p=0.981 n=50+50)
LinkCompiler 131MB ± 0% 128MB ± 0% -2.74% (p=0.000 n=50+50)
ExternalLinkCompiler 120MB ± 0% 120MB ± 0% +0.01% (p=0.007 n=50+50)
LinkWithoutDebugCompiler 77.3MB ± 0% 77.3MB ± 0% -0.02% (p=0.000 n=50+50)
[Geo mean] 69.3MB 69.1MB -0.22%
file before after Δ %
addr2line 4104220 4043684 -60536 -1.475%
api 5342502 5249678 -92824 -1.737%
asm 4973785 4858257 -115528 -2.323%
buildid 2667844 2625660 -42184 -1.581%
cgo 4686849 4616313 -70536 -1.505%
compile 23667431 23268406 -399025 -1.686%
cover 4959676 4874108 -85568 -1.725%
dist 3515934 3450422 -65512 -1.863%
doc 3995581 3925469 -70112 -1.755%
fix 3379202 3318522 -60680 -1.796%
link 6743249 6629913 -113336 -1.681%
nm 4047529 3991777 -55752 -1.377%
objdump 4456151 4388151 -68000 -1.526%
pack 2435040 2398072 -36968 -1.518%
pprof 13804080 13565808 -238272 -1.726%
test2json 2690043 2645987 -44056 -1.638%
trace 10418492 10232716 -185776 -1.783%
vet 7258259 7121259 -137000 -1.888%
total 113145867 111204202 -1941665 -1.716%
The situation on linux/arm64 is as follow:
name old time/op new time/op delta
Template 280ms ± 1% 282ms ± 1% +0.75% (p=0.000 n=46+48)
Unicode 124ms ± 2% 124ms ± 2% +0.37% (p=0.045 n=50+50)
GoTypes 1.69s ± 1% 1.70s ± 1% +0.56% (p=0.000 n=49+50)
Compiler 122ms ± 1% 123ms ± 1% +0.93% (p=0.000 n=50+50)
SSA 12.6s ± 1% 12.7s ± 0% +0.72% (p=0.000 n=50+50)
Flate 170ms ± 1% 172ms ± 1% +0.97% (p=0.000 n=49+49)
GoParser 262ms ± 1% 263ms ± 1% +0.39% (p=0.000 n=49+48)
Reflect 639ms ± 1% 650ms ± 1% +1.63% (p=0.000 n=49+49)
Tar 243ms ± 1% 245ms ± 1% +0.82% (p=0.000 n=50+50)
XML 324ms ± 1% 327ms ± 1% +0.72% (p=0.000 n=50+49)
LinkCompiler 597ms ± 1% 596ms ± 1% -0.27% (p=0.001 n=48+47)
ExternalLinkCompiler 1.90s ± 1% 1.88s ± 1% -1.00% (p=0.000 n=50+50)
LinkWithoutDebugCompiler 364ms ± 1% 363ms ± 1% ~ (p=0.220 n=49+50)
[Geo mean] 485ms 488ms +0.49%
name old alloc/op new alloc/op delta
Template 38.7MB ± 0% 38.8MB ± 1% ~ (p=0.093 n=43+49)
Unicode 28.4MB ± 0% 28.4MB ± 0% +0.03% (p=0.000 n=49+45)
GoTypes 169MB ± 1% 169MB ± 1% +0.23% (p=0.010 n=50+50)
Compiler 23.2MB ± 1% 23.2MB ± 1% +0.11% (p=0.000 n=40+44)
SSA 1.54GB ± 0% 1.55GB ± 0% +0.45% (p=0.000 n=47+49)
Flate 23.8MB ± 2% 23.8MB ± 1% ~ (p=0.543 n=50+50)
GoParser 35.3MB ± 1% 35.4MB ± 1% ~ (p=0.792 n=50+50)
Reflect 85.2MB ± 1% 85.2MB ± 0% ~ (p=0.055 n=50+47)
Tar 34.5MB ± 1% 34.5MB ± 1% +0.06% (p=0.015 n=50+50)
XML 43.8MB ± 2% 43.9MB ± 2% +0.19% (p=0.000 n=48+48)
LinkCompiler 137MB ± 0% 136MB ± 0% -0.92% (p=0.000 n=50+50)
ExternalLinkCompiler 127MB ± 0% 127MB ± 0% ~ (p=0.516 n=50+50)
LinkWithoutDebugCompiler 84.0MB ± 0% 84.0MB ± 0% ~ (p=0.057 n=50+50)
[Geo mean] 70.4MB 70.4MB +0.01%
file before after Δ %
addr2line 4021557 4002933 -18624 -0.463%
api 5127847 5028503 -99344 -1.937%
asm 5034716 4936836 -97880 -1.944%
buildid 2608118 2594094 -14024 -0.538%
cgo 4488592 4398320 -90272 -2.011%
compile 22501129 22213592 -287537 -1.278%
cover 4742301 4713573 -28728 -0.606%
dist 3388071 3365311 -22760 -0.672%
doc 3802250 3776082 -26168 -0.688%
fix 3306147 3216939 -89208 -2.698%
link 6404483 6363699 -40784 -0.637%
nm 3941026 3921930 -19096 -0.485%
objdump 4383330 4295122 -88208 -2.012%
pack 2404547 2389515 -15032 -0.625%
pprof 12996234 12856818 -139416 -1.073%
test2json 2668500 2586788 -81712 -3.062%
trace 9816276 9609580 -206696 -2.106%
vet 6900682 6787338 -113344 -1.643%
total 108535806 107056973 -1478833 -1.363%
Change-Id: Iaec1cdcaacca8025e9babb0fb8a532fddb70c87d
Reviewed-on: https://go-review.googlesource.com/c/go/+/255239
Reviewed-by: eric fang <eric.fang@arm.com>
Reviewed-by: Keith Randall <khr@golang.org>
Trust: eric fang <eric.fang@arm.com>
2021-03-16 02:44:54 +00:00
Agniva De Sarker
8acbe4c0b3
cmd/compile: optimize unsigned comparisons with 0/1 on wasm
...
Updates #21439
Change-Id: I0fbcde6e0c2fc368fe686b271670f9d8be4a7900
Reviewed-on: https://go-review.googlesource.com/c/go/+/247557
Run-TryBot: Agniva De Sarker <agniva.quicksilver@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2020-08-22 12:35:47 +00:00
Junchen Li
06337823ef
cmd/compile: optimize unsigned comparisons to 0/1 on arm64
...
For an unsigned integer, it's useful to convert its order test with 0/1
to its equality test with 0. We can save a comparison instruction that
followed by a conditional branch on arm64 since it supports
compare-with-zero-and-branch instructions. For example,
if x > 0 { ... } else { ... }
the original version:
CMP $0, R0
BLS 9
the optimized version:
CBZ R0, 8
Updates #21439
Change-Id: Id1de6f865f6aa72c5d45b29f7894818857288425
Reviewed-on: https://go-review.googlesource.com/c/go/+/246857
Reviewed-by: Keith Randall <khr@golang.org>
2020-08-18 04:09:13 +00:00
Junchen Li
e30fbe3757
cmd/compile: optimize unsigned comparisons to 0
...
There are some architecture-independent rules in #21439 , since an
unsigned integer >= 0 is always true and < 0 is always false. This CL
adds these optimizations to generic rules.
Updates #21439
Change-Id: Iec7e3040b761ecb1e60908f764815fdd9bc62495
Reviewed-on: https://go-review.googlesource.com/c/go/+/246617
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2020-08-17 20:06:35 +00:00
Xiangdong Ji
e031318ca6
cmd/compile: ARM comparisons with 0 incorrect on overflow
...
Some ARM rewriting rules convert 'comparing to zero' conditions of if
statements to a simplified version utilizing CMN and CMP instructions to
branch over condition flags, in order to save one Add or Sub caculation.
Such optimizations lead to wrong branching in case an overflow/underflow
occurs when executing CMN or CMP.
Fix the issue by introducing new block opcodes that don't honor the
overflow/underflow flag:
Block-Op Meaning ARM condition codes
1. LTnoov less than MI
2. GEnoov greater than or equal PL
3. LEnoov less than or equal MI || EQ
4. GTnoov greater than NEQ & PL
The patch also adds a few test cases to cover scenarios that are specific
to ARM and fine-tunes the code generation tests for 'x-const'.
For more details please refer to the previous fix on 64-bit ARM:
https://go-review.googlesource.com/c/go/+/233097
Go1 perf, 'old' is the non-optimized version, that is removing all concerned
rewriting rules.
name old time/op new time/op delta
BinaryTree17-8 7.73s ± 0% 7.81s ± 0% +0.97% (p=0.000 n=7+8)
Fannkuch11-8 7.06s ± 0% 7.00s ± 0% -0.83% (p=0.000 n=8+8)
FmtFprintfEmpty-8 181ns ± 1% 183ns ± 1% +1.31% (p=0.001 n=8+8)
FmtFprintfString-8 319ns ± 1% 325ns ± 2% +1.71% (p=0.009 n=7+8)
FmtFprintfInt-8 358ns ± 1% 359ns ± 1% ~ (p=0.293 n=7+7)
FmtFprintfIntInt-8 459ns ± 3% 456ns ± 1% ~ (p=0.869 n=8+8)
FmtFprintfPrefixedInt-8 535ns ± 4% 538ns ± 4% ~ (p=0.572 n=8+8)
FmtFprintfFloat-8 1.01µs ± 2% 1.01µs ± 2% ~ (p=0.625 n=8+8)
FmtManyArgs-8 1.93µs ± 2% 1.93µs ± 1% ~ (p=0.979 n=8+7)
GobDecode-8 16.1ms ± 1% 16.5ms ± 1% +2.32% (p=0.000 n=8+8)
GobEncode-8 15.9ms ± 0% 15.8ms ± 1% -1.00% (p=0.000 n=8+7)
Gzip-8 690ms ± 1% 670ms ± 0% -2.90% (p=0.000 n=8+8)
Gunzip-8 109ms ± 1% 109ms ± 1% ~ (p=0.694 n=7+8)
HTTPClientServer-8 149µs ± 3% 146µs ± 2% -1.70% (p=0.028 n=8+8)
JSONEncode-8 50.5ms ± 1% 49.2ms ± 0% -2.60% (p=0.001 n=7+7)
JSONDecode-8 135ms ± 2% 137ms ± 1% ~ (p=0.054 n=8+7)
Mandelbrot200-8 951ms ± 0% 952ms ± 0% ~ (p=0.852 n=6+8)
GoParse-8 9.47ms ± 1% 9.66ms ± 1% +2.01% (p=0.000 n=8+8)
RegexpMatchEasy0_32-8 288ns ± 2% 277ns ± 2% -3.61% (p=0.000 n=8+8)
RegexpMatchEasy0_1K-8 1.66µs ± 1% 1.69µs ± 2% +2.21% (p=0.001 n=7+7)
RegexpMatchEasy1_32-8 334ns ± 1% 305ns ± 2% -8.86% (p=0.000 n=8+8)
RegexpMatchEasy1_1K-8 2.14µs ± 2% 2.15µs ± 0% ~ (p=0.099 n=8+8)
RegexpMatchMedium_32-8 13.3ns ± 1% 13.3ns ± 0% ~ (p=1.000 n=7+7)
RegexpMatchMedium_1K-8 81.1µs ± 3% 80.7µs ± 1% ~ (p=0.955 n=7+8)
RegexpMatchHard_32-8 4.26µs ± 0% 4.26µs ± 0% ~ (p=0.933 n=7+8)
RegexpMatchHard_1K-8 124µs ± 0% 124µs ± 0% +0.31% (p=0.000 n=8+8)
Revcomp-8 14.7ms ± 2% 14.5ms ± 1% -1.66% (p=0.003 n=8+8)
Template-8 197ms ± 2% 200ms ± 3% +1.62% (p=0.021 n=8+8)
TimeParse-8 1.33µs ± 1% 1.30µs ± 1% -1.86% (p=0.002 n=8+8)
TimeFormat-8 3.04µs ± 1% 3.02µs ± 0% -0.60% (p=0.000 n=8+8)
name old speed new speed delta
GobDecode-8 47.6MB/s ± 1% 46.5MB/s ± 1% -2.28% (p=0.000 n=8+8)
GobEncode-8 48.1MB/s ± 0% 48.6MB/s ± 1% +1.02% (p=0.000 n=8+7)
Gzip-8 28.1MB/s ± 1% 29.0MB/s ± 0% +2.97% (p=0.000 n=8+8)
Gunzip-8 178MB/s ± 1% 179MB/s ± 2% ~ (p=0.694 n=7+8)
JSONEncode-8 38.4MB/s ± 1% 39.4MB/s ± 0% +2.67% (p=0.001 n=7+7)
JSONDecode-8 14.3MB/s ± 2% 14.2MB/s ± 1% -0.81% (p=0.043 n=8+7)
GoParse-8 6.12MB/s ± 1% 5.99MB/s ± 1% -2.00% (p=0.000 n=8+8)
RegexpMatchEasy0_32-8 111MB/s ± 2% 115MB/s ± 2% +3.77% (p=0.000 n=8+8)
RegexpMatchEasy0_1K-8 618MB/s ± 1% 604MB/s ± 2% -2.16% (p=0.001 n=7+7)
RegexpMatchEasy1_32-8 95.7MB/s ± 1% 105.1MB/s ± 2% +9.76% (p=0.000 n=8+8)
RegexpMatchEasy1_1K-8 479MB/s ± 2% 477MB/s ± 0% ~ (p=0.105 n=8+8)
RegexpMatchMedium_32-8 75.2MB/s ± 1% 75.2MB/s ± 0% ~ (p=0.247 n=7+7)
RegexpMatchMedium_1K-8 12.6MB/s ± 3% 12.7MB/s ± 1% ~ (p=0.538 n=7+8)
RegexpMatchHard_32-8 7.52MB/s ± 0% 7.52MB/s ± 0% ~ (p=0.968 n=7+8)
RegexpMatchHard_1K-8 8.26MB/s ± 0% 8.24MB/s ± 0% -0.30% (p=0.001 n=8+8)
Revcomp-8 173MB/s ± 2% 176MB/s ± 1% +1.68% (p=0.003 n=8+8)
Template-8 9.85MB/s ± 2% 9.69MB/s ± 3% -1.59% (p=0.021 n=8+8)
Fixes #39303
Updates #38740
Change-Id: I0a5f87bfda679f66414c0041ace2ca2e28363f36
Reviewed-on: https://go-review.googlesource.com/c/go/+/236637
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2020-06-09 15:50:33 +00:00
Xiangdong Ji
e8f5a33191
cmd/compile: fix incorrect rewriting to if condition
...
Some ARM64 rewriting rules convert 'comparing to zero' conditions of if
statements to a simplified version utilizing CMN and CMP instructions to
branch over condition flags, in order to save one Add or Sub caculation.
Such optimizations lead to wrong branching in case an overflow/underflow
occurs when executing CMN or CMP.
Fix the issue by introducing new block opcodes that don't honor the
overflow/underflow flag, in the following categories:
Block-Op Meaning ARM condition codes
1. LTnoov less than MI
2. GEnoov greater than or equal PL
3. LEnoov less than or equal MI || EQ
4. GTnoov greater than NEQ & PL
The backend generates two consecutive branch instructions for 'LEnoov'
and 'GTnoov' to model their expected behavior. A slight change to 'gc'
and amd64/386 backends is made to unify the code generation.
Add a test 'TestCondRewrite' as justification, it covers 32 incorrect rules
identified on arm64, more might be needed on other arches, like 32-bit arm.
Add two benchmarks profiling the aforementioned category 1&2 and category
3&4 separetely, we expect the first two categories will show performance
improvement and the second will not result in visible regression compared with
the non-optimized version.
This change also updates TestFormats to support using %#x.
Examples exhibiting where does the issue come from:
1: 'if x + 3 < 0' might be converted to:
before:
CMN $3, R0
BGE <else branch> // wrong branch is taken if 'x+3' overflows
after:
CMN $3, R0
BPL <else branch>
2: 'if y - 3 > 0' might be converted to:
before:
CMP $3, R0
BLE <else branch> // wrong branch is taken if 'y-3' underflows
after:
CMP $3, R0
BMI <else branch>
BEQ <else branch>
Benchmark data from different kinds of arm64 servers, 'old' is the non-optimized
version (not the parent commit), generally the optimization version outperforms.
S1:
name old time/op new time/op delta
CondRewrite/SoloJump 13.6ns ± 0% 12.9ns ± 0% -5.15% (p=0.000 n=10+10)
CondRewrite/CombJump 13.8ns ± 1% 12.9ns ± 0% -6.32% (p=0.000 n=10+10)
S2:
name old time/op new time/op delta
CondRewrite/SoloJump 11.6ns ± 0% 10.9ns ± 0% -6.03% (p=0.000 n=10+10)
CondRewrite/CombJump 11.4ns ± 0% 10.8ns ± 1% -5.53% (p=0.000 n=10+10)
S3:
name old time/op new time/op delta
CondRewrite/SoloJump 7.36ns ± 0% 7.50ns ± 0% +1.79% (p=0.000 n=9+10)
CondRewrite/CombJump 7.35ns ± 0% 7.75ns ± 0% +5.51% (p=0.000 n=8+9)
S4:
name old time/op new time/op delta
CondRewrite/SoloJump-224 11.5ns ± 1% 10.9ns ± 0% -4.97% (p=0.000 n=10+10)
CondRewrite/CombJump-224 11.9ns ± 0% 11.5ns ± 0% -2.95% (p=0.000 n=10+10)
S5:
name old time/op new time/op delta
CondRewrite/SoloJump 10.0ns ± 0% 10.0ns ± 0% -0.45% (p=0.000 n=9+10)
CondRewrite/CombJump 9.93ns ± 0% 9.77ns ± 0% -1.53% (p=0.000 n=10+9)
Go1 perf. data:
name old time/op new time/op delta
BinaryTree17 6.29s ± 1% 6.30s ± 1% ~ (p=1.000 n=5+5)
Fannkuch11 5.40s ± 0% 5.40s ± 0% ~ (p=0.841 n=5+5)
FmtFprintfEmpty 97.9ns ± 0% 98.9ns ± 3% ~ (p=0.937 n=4+5)
FmtFprintfString 171ns ± 3% 171ns ± 2% ~ (p=0.754 n=5+5)
FmtFprintfInt 212ns ± 0% 217ns ± 6% +2.55% (p=0.008 n=5+5)
FmtFprintfIntInt 296ns ± 1% 297ns ± 2% ~ (p=0.516 n=5+5)
FmtFprintfPrefixedInt 371ns ± 2% 374ns ± 7% ~ (p=1.000 n=5+5)
FmtFprintfFloat 435ns ± 1% 439ns ± 2% ~ (p=0.056 n=5+5)
FmtManyArgs 1.37µs ± 1% 1.36µs ± 1% ~ (p=0.730 n=5+5)
GobDecode 14.6ms ± 4% 14.4ms ± 4% ~ (p=0.690 n=5+5)
GobEncode 11.8ms ±20% 11.6ms ±15% ~ (p=1.000 n=5+5)
Gzip 507ms ± 0% 491ms ± 0% -3.22% (p=0.008 n=5+5)
Gunzip 73.8ms ± 0% 73.9ms ± 0% ~ (p=0.690 n=5+5)
HTTPClientServer 116µs ± 0% 116µs ± 0% ~ (p=0.686 n=4+4)
JSONEncode 21.8ms ± 1% 21.6ms ± 2% ~ (p=0.151 n=5+5)
JSONDecode 104ms ± 1% 103ms ± 1% -1.08% (p=0.016 n=5+5)
Mandelbrot200 9.53ms ± 0% 9.53ms ± 0% ~ (p=0.421 n=5+5)
GoParse 7.55ms ± 1% 7.51ms ± 1% ~ (p=0.151 n=5+5)
RegexpMatchEasy0_32 158ns ± 0% 158ns ± 0% ~ (all equal)
RegexpMatchEasy0_1K 606ns ± 1% 608ns ± 3% ~ (p=0.937 n=5+5)
RegexpMatchEasy1_32 143ns ± 0% 144ns ± 1% ~ (p=0.095 n=5+4)
RegexpMatchEasy1_1K 927ns ± 2% 944ns ± 2% ~ (p=0.056 n=5+5)
RegexpMatchMedium_32 16.0ns ± 0% 16.0ns ± 0% ~ (all equal)
RegexpMatchMedium_1K 69.3µs ± 2% 69.7µs ± 0% ~ (p=0.690 n=5+5)
RegexpMatchHard_32 3.73µs ± 0% 3.73µs ± 1% ~ (p=0.984 n=5+5)
RegexpMatchHard_1K 111µs ± 1% 110µs ± 0% ~ (p=0.151 n=5+5)
Revcomp 1.91s ±47% 1.77s ±68% ~ (p=1.000 n=5+5)
Template 138ms ± 1% 138ms ± 1% ~ (p=1.000 n=5+5)
TimeParse 787ns ± 2% 785ns ± 1% ~ (p=0.540 n=5+5)
TimeFormat 729ns ± 1% 726ns ± 1% ~ (p=0.151 n=5+5)
Updates #38740
Change-Id: I06c604874acdc1e63e66452dadee5df053045222
Reviewed-on: https://go-review.googlesource.com/c/go/+/233097
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
2020-05-29 15:39:54 +00:00
Agniva De Sarker
ecc7dd5469
test/codegen: fix wasm codegen breakage
...
i32.eqz instructions don't appear unless needed in if conditions anymore
after CL 195204. I forgot to run the codegen tests while submitting the CL.
Thanks to @martisch for catching it.
Fixes #34442
Change-Id: I177b064b389be48e39d564849714d7a8839be13e
Reviewed-on: https://go-review.googlesource.com/c/go/+/196580
Run-TryBot: Agniva De Sarker <agniva.quicksilver@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Martin Möhrmann <moehrmann@google.com>
2019-09-21 16:31:44 +00:00
Ben Shi
1786ecd502
cmd/compile: eliminate WASM's redundant extension & wrapping
...
This CL eliminates unnecessary pairs of I32WrapI64 and
I64ExtendI32U generated by the WASM backend for IF
statements. And it makes the total size of pkg/js_wasm/
decreases about 490KB.
Change-Id: I16b0abb686c4e30d5624323166ec2d0ec57dbe2d
Reviewed-on: https://go-review.googlesource.com/c/go/+/191758
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Richard Musiol <neelance@gmail.com>
2019-08-30 21:20:03 +00:00
Josh Bleecher Snyder
870cfe6484
test/codegen: gofmt
...
Change-Id: I33f5b5051e5f75aa264ec656926223c5a3c09c1b
Reviewed-on: https://go-review.googlesource.com/c/go/+/167498
Run-TryBot: Josh Bleecher Snyder <josharian@gmail.com>
Reviewed-by: Matt Layher <mdlayher@gmail.com>
2019-03-13 21:44:45 +00:00
Lynn Boger
4ae49b5921
cmd/compile: use ANDCC, ORCC, XORCC to avoid CMP on ppc64x
...
This change makes use of the cc versions of the AND, OR, XOR
instructions, omitting the need for a CMP instruction.
In many test programs and in the go binary, this reduces the
size of 20-30 functions by at least 1 instruction, many in
runtime.
Testcase added to test/codegen/comparisons.go
Change-Id: I6cc1ca8b80b065d7390749c625bc9784b0039adb
Reviewed-on: https://go-review.googlesource.com/c/143059
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
Reviewed-by: Michael Munday <mike.munday@ibm.com>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-11-09 19:40:52 +00:00
Lynn Boger
39fa301bdc
test/codegen: enable more tests for ppc64/ppc64le
...
Adding cases for ppc64,ppc64le to the codegen tests
where appropriate.
Change-Id: Idf8cbe88a4ab4406a4ef1ea777bd15a58b68f3ed
Reviewed-on: https://go-review.googlesource.com/c/142557
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
2018-10-16 19:00:53 +00:00
Ben Shi
5aeecc4530
cmd/compile: optimize arm64's code with more shifted operations
...
This CL optimizes arm64's NEG/MVN/TST/CMN with a shifted operand.
1. The total size of pkg/android_arm64 decreases about 0.2KB, excluding
cmd/compile/ .
2. The go1 benchmark shows no regression, excluding noise.
name old time/op new time/op delta
BinaryTree17-4 16.4s ± 1% 16.4s ± 1% ~ (p=0.914 n=29+29)
Fannkuch11-4 8.72s ± 0% 8.72s ± 0% ~ (p=0.274 n=30+29)
FmtFprintfEmpty-4 174ns ± 0% 174ns ± 0% ~ (all equal)
FmtFprintfString-4 370ns ± 0% 370ns ± 0% ~ (all equal)
FmtFprintfInt-4 419ns ± 0% 419ns ± 0% ~ (all equal)
FmtFprintfIntInt-4 672ns ± 1% 675ns ± 2% ~ (p=0.217 n=28+30)
FmtFprintfPrefixedInt-4 806ns ± 0% 806ns ± 0% ~ (p=0.402 n=30+28)
FmtFprintfFloat-4 1.09µs ± 0% 1.09µs ± 0% +0.02% (p=0.011 n=22+27)
FmtManyArgs-4 2.67µs ± 0% 2.68µs ± 0% ~ (p=0.279 n=29+30)
GobDecode-4 33.1ms ± 1% 33.1ms ± 0% ~ (p=0.052 n=28+29)
GobEncode-4 29.6ms ± 0% 29.6ms ± 0% +0.08% (p=0.013 n=28+29)
Gzip-4 1.38s ± 2% 1.39s ± 2% ~ (p=0.071 n=29+29)
Gunzip-4 139ms ± 0% 139ms ± 0% ~ (p=0.265 n=29+29)
HTTPClientServer-4 789µs ± 4% 785µs ± 4% ~ (p=0.206 n=29+28)
JSONEncode-4 49.7ms ± 0% 49.6ms ± 0% -0.24% (p=0.000 n=30+30)
JSONDecode-4 266ms ± 1% 267ms ± 1% +0.34% (p=0.000 n=30+30)
Mandelbrot200-4 16.6ms ± 0% 16.6ms ± 0% ~ (p=0.835 n=28+30)
GoParse-4 15.9ms ± 0% 15.8ms ± 0% -0.29% (p=0.000 n=27+30)
RegexpMatchEasy0_32-4 380ns ± 0% 381ns ± 0% +0.18% (p=0.000 n=30+30)
RegexpMatchEasy0_1K-4 1.18µs ± 0% 1.19µs ± 0% +0.23% (p=0.000 n=30+30)
RegexpMatchEasy1_32-4 357ns ± 0% 358ns ± 0% +0.28% (p=0.000 n=29+29)
RegexpMatchEasy1_1K-4 2.04µs ± 0% 2.04µs ± 0% +0.06% (p=0.006 n=30+30)
RegexpMatchMedium_32-4 589ns ± 0% 590ns ± 0% +0.24% (p=0.000 n=28+30)
RegexpMatchMedium_1K-4 162µs ± 0% 162µs ± 0% -0.01% (p=0.027 n=26+29)
RegexpMatchHard_32-4 9.58µs ± 0% 9.58µs ± 0% ~ (p=0.935 n=30+30)
RegexpMatchHard_1K-4 287µs ± 0% 287µs ± 0% ~ (p=0.387 n=29+30)
Revcomp-4 2.50s ± 0% 2.50s ± 0% -0.10% (p=0.020 n=28+28)
Template-4 310ms ± 0% 310ms ± 1% ~ (p=0.406 n=30+30)
TimeParse-4 1.68µs ± 0% 1.68µs ± 0% +0.03% (p=0.014 n=30+17)
TimeFormat-4 1.65µs ± 0% 1.66µs ± 0% +0.32% (p=0.000 n=27+29)
[Geo mean] 247µs 247µs +0.05%
name old speed new speed delta
GobDecode-4 23.2MB/s ± 0% 23.2MB/s ± 0% -0.08% (p=0.032 n=27+29)
GobEncode-4 26.0MB/s ± 0% 25.9MB/s ± 0% -0.10% (p=0.011 n=29+29)
Gzip-4 14.1MB/s ± 2% 14.0MB/s ± 2% ~ (p=0.081 n=29+29)
Gunzip-4 139MB/s ± 0% 139MB/s ± 0% ~ (p=0.290 n=29+29)
JSONEncode-4 39.0MB/s ± 0% 39.1MB/s ± 0% +0.25% (p=0.000 n=29+30)
JSONDecode-4 7.30MB/s ± 1% 7.28MB/s ± 1% -0.33% (p=0.000 n=30+30)
GoParse-4 3.65MB/s ± 0% 3.66MB/s ± 0% +0.29% (p=0.000 n=27+30)
RegexpMatchEasy0_32-4 84.1MB/s ± 0% 84.0MB/s ± 0% -0.17% (p=0.000 n=30+28)
RegexpMatchEasy0_1K-4 864MB/s ± 0% 862MB/s ± 0% -0.24% (p=0.000 n=30+30)
RegexpMatchEasy1_32-4 89.5MB/s ± 0% 89.3MB/s ± 0% -0.18% (p=0.000 n=28+24)
RegexpMatchEasy1_1K-4 502MB/s ± 0% 502MB/s ± 0% -0.05% (p=0.008 n=30+29)
RegexpMatchMedium_32-4 1.70MB/s ± 0% 1.69MB/s ± 0% -0.59% (p=0.000 n=29+30)
RegexpMatchMedium_1K-4 6.31MB/s ± 0% 6.31MB/s ± 0% +0.05% (p=0.005 n=30+26)
RegexpMatchHard_32-4 3.34MB/s ± 0% 3.34MB/s ± 0% ~ (all equal)
RegexpMatchHard_1K-4 3.57MB/s ± 0% 3.57MB/s ± 0% ~ (all equal)
Revcomp-4 102MB/s ± 0% 102MB/s ± 0% +0.10% (p=0.022 n=28+28)
Template-4 6.26MB/s ± 0% 6.26MB/s ± 1% ~ (p=0.768 n=30+30)
[Geo mean] 24.2MB/s 24.1MB/s -0.08%
Change-Id: I494f9db7f8a568a00e9c74ae25086a58b2221683
Reviewed-on: https://go-review.googlesource.com/137976
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-09-28 15:05:17 +00:00
Ben Shi
031a35ec84
cmd/compile: optimize 386's comparison
...
Optimization of "(CMPconst [0] (ANDL x y)) -> (TESTL x y)" only
get benefits if there is no further use of the result of x&y. A
condition of uses==1 will have slight improvements.
1. The code size of pkg/linux_386 decreases about 300 bytes, excluding
cmd/compile/.
2. The go1 benchmark shows no regression, and even a slight improvement
in test case FmtFprintfEmpty-4, excluding noise.
name old time/op new time/op delta
BinaryTree17-4 3.34s ± 3% 3.32s ± 2% ~ (p=0.197 n=30+30)
Fannkuch11-4 3.48s ± 2% 3.47s ± 1% -0.33% (p=0.015 n=30+30)
FmtFprintfEmpty-4 46.3ns ± 4% 44.8ns ± 4% -3.33% (p=0.000 n=30+30)
FmtFprintfString-4 78.8ns ± 7% 77.3ns ± 5% ~ (p=0.098 n=30+26)
FmtFprintfInt-4 90.2ns ± 1% 90.0ns ± 7% -0.23% (p=0.027 n=18+30)
FmtFprintfIntInt-4 144ns ± 4% 143ns ± 5% ~ (p=0.945 n=30+29)
FmtFprintfPrefixedInt-4 180ns ± 4% 180ns ± 5% ~ (p=0.858 n=30+30)
FmtFprintfFloat-4 409ns ± 4% 406ns ± 3% -0.87% (p=0.028 n=30+30)
FmtManyArgs-4 611ns ± 5% 608ns ± 4% ~ (p=0.812 n=30+30)
GobDecode-4 7.30ms ± 5% 7.26ms ± 5% ~ (p=0.522 n=30+29)
GobEncode-4 6.90ms ± 7% 6.82ms ± 4% ~ (p=0.086 n=29+28)
Gzip-4 396ms ± 4% 400ms ± 4% +0.99% (p=0.026 n=30+30)
Gunzip-4 41.1ms ± 3% 41.2ms ± 3% ~ (p=0.495 n=30+30)
HTTPClientServer-4 63.7µs ± 3% 63.3µs ± 2% ~ (p=0.113 n=29+29)
JSONEncode-4 16.1ms ± 2% 16.1ms ± 2% -0.30% (p=0.041 n=30+30)
JSONDecode-4 60.9ms ± 3% 61.2ms ± 6% ~ (p=0.187 n=30+30)
Mandelbrot200-4 5.17ms ± 2% 5.19ms ± 3% ~ (p=0.676 n=30+30)
GoParse-4 3.28ms ± 3% 3.25ms ± 2% -0.97% (p=0.002 n=30+30)
RegexpMatchEasy0_32-4 103ns ± 4% 104ns ± 4% ~ (p=0.352 n=30+30)
RegexpMatchEasy0_1K-4 849ns ± 2% 845ns ± 2% ~ (p=0.381 n=30+30)
RegexpMatchEasy1_32-4 113ns ± 4% 113ns ± 4% ~ (p=0.795 n=30+30)
RegexpMatchEasy1_1K-4 1.03µs ± 3% 1.03µs ± 4% ~ (p=0.275 n=25+30)
RegexpMatchMedium_32-4 132ns ± 3% 132ns ± 3% ~ (p=0.970 n=30+30)
RegexpMatchMedium_1K-4 41.4µs ± 3% 41.4µs ± 3% ~ (p=0.212 n=30+30)
RegexpMatchHard_32-4 2.22µs ± 4% 2.22µs ± 4% ~ (p=0.399 n=30+30)
RegexpMatchHard_1K-4 67.2µs ± 3% 67.6µs ± 4% ~ (p=0.359 n=30+30)
Revcomp-4 1.84s ± 2% 1.83s ± 2% ~ (p=0.532 n=30+30)
Template-4 69.1ms ± 4% 68.8ms ± 3% ~ (p=0.146 n=30+30)
TimeParse-4 441ns ± 3% 442ns ± 3% ~ (p=0.154 n=30+30)
TimeFormat-4 413ns ± 3% 414ns ± 3% ~ (p=0.275 n=30+30)
[Geo mean] 66.2µs 66.0µs -0.28%
name old speed new speed delta
GobDecode-4 105MB/s ± 5% 106MB/s ± 5% ~ (p=0.514 n=30+29)
GobEncode-4 111MB/s ± 5% 113MB/s ± 4% +1.37% (p=0.046 n=28+28)
Gzip-4 49.1MB/s ± 4% 48.6MB/s ± 4% -0.98% (p=0.028 n=30+30)
Gunzip-4 472MB/s ± 4% 472MB/s ± 3% ~ (p=0.496 n=30+30)
JSONEncode-4 120MB/s ± 2% 121MB/s ± 2% +0.29% (p=0.042 n=30+30)
JSONDecode-4 31.9MB/s ± 3% 31.7MB/s ± 6% ~ (p=0.186 n=30+30)
GoParse-4 17.6MB/s ± 3% 17.8MB/s ± 2% +0.98% (p=0.002 n=30+30)
RegexpMatchEasy0_32-4 309MB/s ± 4% 307MB/s ± 4% ~ (p=0.501 n=30+30)
RegexpMatchEasy0_1K-4 1.21GB/s ± 2% 1.21GB/s ± 2% ~ (p=0.301 n=30+30)
RegexpMatchEasy1_32-4 283MB/s ± 4% 282MB/s ± 3% ~ (p=0.877 n=30+30)
RegexpMatchEasy1_1K-4 1.00GB/s ± 3% 0.99GB/s ± 4% ~ (p=0.276 n=25+30)
RegexpMatchMedium_32-4 7.54MB/s ± 3% 7.55MB/s ± 3% ~ (p=0.528 n=30+30)
RegexpMatchMedium_1K-4 24.7MB/s ± 3% 24.7MB/s ± 3% ~ (p=0.203 n=30+30)
RegexpMatchHard_32-4 14.4MB/s ± 4% 14.4MB/s ± 4% ~ (p=0.407 n=30+30)
RegexpMatchHard_1K-4 15.3MB/s ± 3% 15.1MB/s ± 4% ~ (p=0.306 n=30+30)
Revcomp-4 138MB/s ± 2% 139MB/s ± 2% ~ (p=0.520 n=30+30)
Template-4 28.1MB/s ± 4% 28.2MB/s ± 3% ~ (p=0.149 n=30+30)
[Geo mean] 81.5MB/s 81.5MB/s +0.06%
Change-Id: I7f75425f79eec93cdd8fdd94db13ad4f61b6a2f5
Reviewed-on: https://go-review.googlesource.com/133657
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-09-07 01:20:45 +00:00
Ben Shi
067dfce21f
cmd/compile: optimize ARM's comparision
...
Optimize (CMPconst [0] (ADD x y)) to (CMN x y) will only get benefits
when the result of the addition is no longer used, otherwise there
might be even performance drop. And this CL fixes that issue for
CMP/CMN/TST/TEQ.
There is little regression in the go1 benchmark (excluding noise),
and the test case JSONDecode-4 even gets improvement.
name old time/op new time/op delta
BinaryTree17-4 21.6s ± 1% 21.6s ± 0% -0.22% (p=0.013 n=30+30)
Fannkuch11-4 11.1s ± 0% 11.1s ± 0% +0.11% (p=0.000 n=30+29)
FmtFprintfEmpty-4 297ns ± 0% 297ns ± 0% +0.08% (p=0.007 n=26+28)
FmtFprintfString-4 589ns ± 1% 589ns ± 0% ~ (p=0.659 n=30+25)
FmtFprintfInt-4 644ns ± 1% 650ns ± 0% +0.88% (p=0.000 n=30+24)
FmtFprintfIntInt-4 964ns ± 0% 977ns ± 0% +1.33% (p=0.000 n=30+30)
FmtFprintfPrefixedInt-4 1.06µs ± 0% 1.07µs ± 0% +1.31% (p=0.000 n=29+27)
FmtFprintfFloat-4 1.89µs ± 0% 1.92µs ± 0% +1.25% (p=0.000 n=29+29)
FmtManyArgs-4 3.63µs ± 0% 3.67µs ± 0% +1.33% (p=0.000 n=29+27)
GobDecode-4 38.1ms ± 1% 37.9ms ± 1% -0.60% (p=0.000 n=29+29)
GobEncode-4 35.3ms ± 2% 35.2ms ± 1% ~ (p=0.286 n=30+30)
Gzip-4 2.36s ± 0% 2.37s ± 2% ~ (p=0.277 n=24+28)
Gunzip-4 264ms ± 1% 264ms ± 1% ~ (p=0.104 n=28+30)
HTTPClientServer-4 1.04ms ± 4% 1.02ms ± 4% -1.65% (p=0.000 n=28+28)
JSONEncode-4 78.5ms ± 1% 79.6ms ± 1% +1.34% (p=0.000 n=27+28)
JSONDecode-4 379ms ± 4% 352ms ± 5% -7.09% (p=0.000 n=29+30)
Mandelbrot200-4 17.6ms ± 0% 17.6ms ± 0% ~ (p=0.206 n=28+29)
GoParse-4 21.9ms ± 1% 22.1ms ± 1% +0.87% (p=0.000 n=28+26)
RegexpMatchEasy0_32-4 631ns ± 0% 641ns ± 0% +1.63% (p=0.000 n=29+30)
RegexpMatchEasy0_1K-4 4.11µs ± 0% 4.11µs ± 0% ~ (p=0.700 n=30+30)
RegexpMatchEasy1_32-4 670ns ± 0% 679ns ± 0% +1.37% (p=0.000 n=21+30)
RegexpMatchEasy1_1K-4 5.31µs ± 0% 5.26µs ± 0% -1.03% (p=0.000 n=25+28)
RegexpMatchMedium_32-4 905ns ± 0% 906ns ± 0% +0.14% (p=0.001 n=30+30)
RegexpMatchMedium_1K-4 192µs ± 0% 191µs ± 0% -0.45% (p=0.000 n=29+27)
RegexpMatchHard_32-4 11.8µs ± 0% 11.7µs ± 0% -0.39% (p=0.000 n=29+28)
RegexpMatchHard_1K-4 347µs ± 0% 347µs ± 0% ~ (p=0.084 n=29+30)
Revcomp-4 37.5ms ± 1% 37.5ms ± 1% ~ (p=0.279 n=29+29)
Template-4 519ms ± 2% 519ms ± 2% ~ (p=0.652 n=28+29)
TimeParse-4 2.83µs ± 0% 2.78µs ± 0% -1.90% (p=0.000 n=27+28)
TimeFormat-4 5.79µs ± 0% 5.60µs ± 0% -3.23% (p=0.000 n=29+29)
[Geo mean] 331µs 330µs -0.16%
name old speed new speed delta
GobDecode-4 20.1MB/s ± 1% 20.3MB/s ± 1% +0.61% (p=0.000 n=29+29)
GobEncode-4 21.7MB/s ± 2% 21.8MB/s ± 1% ~ (p=0.294 n=30+30)
Gzip-4 8.23MB/s ± 1% 8.20MB/s ± 2% ~ (p=0.099 n=26+28)
Gunzip-4 73.5MB/s ± 1% 73.4MB/s ± 1% ~ (p=0.107 n=28+30)
JSONEncode-4 24.7MB/s ± 1% 24.4MB/s ± 1% -1.32% (p=0.000 n=27+28)
JSONDecode-4 5.13MB/s ± 4% 5.52MB/s ± 5% +7.65% (p=0.000 n=29+30)
GoParse-4 2.65MB/s ± 1% 2.63MB/s ± 1% -0.87% (p=0.000 n=28+26)
RegexpMatchEasy0_32-4 50.7MB/s ± 0% 49.9MB/s ± 0% -1.58% (p=0.000 n=29+29)
RegexpMatchEasy0_1K-4 249MB/s ± 0% 249MB/s ± 0% ~ (p=0.342 n=30+28)
RegexpMatchEasy1_32-4 47.7MB/s ± 0% 47.1MB/s ± 0% -1.39% (p=0.000 n=26+30)
RegexpMatchEasy1_1K-4 193MB/s ± 0% 195MB/s ± 0% +1.04% (p=0.000 n=25+28)
RegexpMatchMedium_32-4 1.10MB/s ± 0% 1.10MB/s ± 0% -0.42% (p=0.000 n=30+26)
RegexpMatchMedium_1K-4 5.33MB/s ± 0% 5.36MB/s ± 0% +0.43% (p=0.000 n=29+29)
RegexpMatchHard_32-4 2.72MB/s ± 0% 2.73MB/s ± 0% +0.37% (p=0.000 n=29+30)
RegexpMatchHard_1K-4 2.95MB/s ± 0% 2.95MB/s ± 0% ~ (all equal)
Revcomp-4 67.8MB/s ± 1% 67.7MB/s ± 1% ~ (p=0.273 n=29+29)
Template-4 3.74MB/s ± 2% 3.74MB/s ± 2% ~ (p=0.665 n=28+29)
[Geo mean] 15.2MB/s 15.2MB/s +0.21%
Change-Id: Ifed1fb8cc02d5ca52c8bc6c21b6b5bf6dbb2701a
Reviewed-on: https://go-review.googlesource.com/132115
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-09-05 14:54:08 +00:00
Ben Shi
0e9f1de0b7
cmd/compile: optimize arm64's comparison
...
Add more optimization with TST/CMN.
1. A tiny benchmark shows more than 12% improvement.
TSTCMN-4 378µs ± 0% 332µs ± 0% -12.15% (p=0.000 n=30+27)
(https://github.com/benshi001/ugo1/blob/master/tstcmn_test.go )
2. There is little regression in the go1 benchmark, excluding noise.
name old time/op new time/op delta
BinaryTree17-4 19.1s ± 0% 19.1s ± 0% ~ (p=0.994 n=28+29)
Fannkuch11-4 10.0s ± 0% 10.0s ± 0% ~ (p=0.198 n=30+25)
FmtFprintfEmpty-4 233ns ± 0% 233ns ± 0% +0.14% (p=0.002 n=24+30)
FmtFprintfString-4 428ns ± 0% 428ns ± 0% ~ (all equal)
FmtFprintfInt-4 472ns ± 0% 472ns ± 0% ~ (all equal)
FmtFprintfIntInt-4 725ns ± 0% 725ns ± 0% ~ (all equal)
FmtFprintfPrefixedInt-4 889ns ± 0% 888ns ± 0% ~ (p=0.632 n=28+30)
FmtFprintfFloat-4 1.20µs ± 0% 1.20µs ± 0% +0.05% (p=0.001 n=18+30)
FmtManyArgs-4 3.00µs ± 0% 2.99µs ± 0% -0.07% (p=0.001 n=27+30)
GobDecode-4 42.1ms ± 0% 42.2ms ± 0% +0.29% (p=0.000 n=28+28)
GobEncode-4 38.6ms ± 9% 38.8ms ± 9% ~ (p=0.912 n=30+30)
Gzip-4 2.07s ± 1% 2.05s ± 1% -0.64% (p=0.000 n=29+30)
Gunzip-4 175ms ± 0% 175ms ± 0% -0.15% (p=0.001 n=30+30)
HTTPClientServer-4 872µs ± 5% 880µs ± 6% ~ (p=0.196 n=30+29)
JSONEncode-4 88.5ms ± 1% 89.8ms ± 1% +1.49% (p=0.000 n=23+24)
JSONDecode-4 393ms ± 1% 390ms ± 1% -0.89% (p=0.000 n=28+30)
Mandelbrot200-4 19.5ms ± 0% 19.5ms ± 0% ~ (p=0.405 n=29+28)
GoParse-4 19.9ms ± 0% 20.0ms ± 0% +0.27% (p=0.000 n=30+30)
RegexpMatchEasy0_32-4 431ns ± 0% 431ns ± 0% ~ (p=1.000 n=30+30)
RegexpMatchEasy0_1K-4 1.61µs ± 0% 1.61µs ± 0% ~ (p=0.527 n=26+26)
RegexpMatchEasy1_32-4 443ns ± 0% 443ns ± 0% ~ (all equal)
RegexpMatchEasy1_1K-4 2.58µs ± 1% 2.58µs ± 1% ~ (p=0.578 n=27+25)
RegexpMatchMedium_32-4 740ns ± 0% 740ns ± 0% ~ (p=0.357 n=30+30)
RegexpMatchMedium_1K-4 223µs ± 0% 223µs ± 0% +0.16% (p=0.000 n=30+29)
RegexpMatchHard_32-4 12.3µs ± 0% 12.3µs ± 0% ~ (p=0.236 n=27+27)
RegexpMatchHard_1K-4 371µs ± 0% 371µs ± 0% +0.09% (p=0.000 n=30+27)
Revcomp-4 2.85s ± 0% 2.85s ± 0% ~ (p=0.057 n=28+25)
Template-4 408ms ± 1% 409ms ± 1% ~ (p=0.117 n=29+29)
TimeParse-4 1.93µs ± 0% 1.93µs ± 0% ~ (p=0.535 n=29+28)
TimeFormat-4 1.99µs ± 0% 1.99µs ± 0% ~ (p=0.168 n=29+28)
[Geo mean] 306µs 307µs +0.07%
name old speed new speed delta
GobDecode-4 18.3MB/s ± 0% 18.2MB/s ± 0% -0.31% (p=0.000 n=28+29)
GobEncode-4 19.9MB/s ± 8% 19.8MB/s ± 9% ~ (p=0.923 n=30+30)
Gzip-4 9.39MB/s ± 1% 9.45MB/s ± 1% +0.65% (p=0.000 n=29+30)
Gunzip-4 111MB/s ± 0% 111MB/s ± 0% +0.15% (p=0.001 n=30+30)
JSONEncode-4 21.9MB/s ± 1% 21.6MB/s ± 1% -1.45% (p=0.000 n=23+23)
JSONDecode-4 4.94MB/s ± 1% 4.98MB/s ± 1% +0.84% (p=0.000 n=27+30)
GoParse-4 2.91MB/s ± 0% 2.90MB/s ± 0% -0.34% (p=0.000 n=21+22)
RegexpMatchEasy0_32-4 74.1MB/s ± 0% 74.1MB/s ± 0% ~ (p=0.469 n=29+28)
RegexpMatchEasy0_1K-4 634MB/s ± 0% 634MB/s ± 0% ~ (p=0.978 n=24+28)
RegexpMatchEasy1_32-4 72.2MB/s ± 0% 72.2MB/s ± 0% ~ (p=0.064 n=27+29)
RegexpMatchEasy1_1K-4 396MB/s ± 1% 396MB/s ± 1% ~ (p=0.583 n=27+25)
RegexpMatchMedium_32-4 1.35MB/s ± 0% 1.35MB/s ± 0% ~ (all equal)
RegexpMatchMedium_1K-4 4.60MB/s ± 0% 4.59MB/s ± 0% -0.14% (p=0.000 n=30+26)
RegexpMatchHard_32-4 2.61MB/s ± 0% 2.61MB/s ± 0% ~ (all equal)
RegexpMatchHard_1K-4 2.76MB/s ± 0% 2.76MB/s ± 0% ~ (all equal)
Revcomp-4 89.1MB/s ± 0% 89.1MB/s ± 0% ~ (p=0.059 n=28+25)
Template-4 4.75MB/s ± 1% 4.75MB/s ± 1% ~ (p=0.106 n=29+29)
[Geo mean] 18.3MB/s 18.3MB/s -0.07%
Change-Id: I3cd76ce63e84b0c3cebabf9fa3573b76a7343899
Reviewed-on: https://go-review.googlesource.com/124935
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-09-05 02:51:28 +00:00
Ben Shi
2b69ad0b3c
cmd/compile: optimize arm's comparison
...
The CMP/CMN/TST/TEQ perform similar to SUB/ADD/AND/XOR except
the result is abondoned, and only NZCV flags are affected.
This CL implements further optimization with them.
1. A micro benchmark test gets more than 9% improvment.
TSTTEQ-4 6.99ms ± 0% 6.35ms ± 0% -9.15% (p=0.000 n=33+36)
(https://github.com/benshi001/ugo1/blob/master/tstteq2_test.go )
2. The go1 benckmark shows no regression, excluding noise.
name old time/op new time/op delta
BinaryTree17-4 25.7s ± 1% 25.7s ± 1% ~ (p=0.830 n=40+40)
Fannkuch11-4 13.3s ± 0% 13.2s ± 0% -0.65% (p=0.000 n=40+34)
FmtFprintfEmpty-4 394ns ± 0% 394ns ± 0% ~ (p=0.819 n=40+40)
FmtFprintfString-4 677ns ± 0% 677ns ± 0% +0.06% (p=0.039 n=39+40)
FmtFprintfInt-4 707ns ± 0% 706ns ± 0% -0.14% (p=0.000 n=40+39)
FmtFprintfIntInt-4 1.04µs ± 0% 1.04µs ± 0% +0.10% (p=0.000 n=29+31)
FmtFprintfPrefixedInt-4 1.10µs ± 0% 1.11µs ± 0% +0.65% (p=0.000 n=39+37)
FmtFprintfFloat-4 2.27µs ± 0% 2.26µs ± 0% -0.53% (p=0.000 n=39+40)
FmtManyArgs-4 3.96µs ± 0% 3.96µs ± 0% +0.10% (p=0.000 n=39+40)
GobDecode-4 53.4ms ± 1% 52.8ms ± 2% -1.10% (p=0.000 n=39+39)
GobEncode-4 50.3ms ± 3% 50.4ms ± 2% ~ (p=0.089 n=40+39)
Gzip-4 2.62s ± 0% 2.64s ± 0% +0.60% (p=0.000 n=40+39)
Gunzip-4 312ms ± 0% 312ms ± 0% +0.02% (p=0.030 n=40+39)
HTTPClientServer-4 1.01ms ± 7% 0.98ms ± 7% -2.37% (p=0.000 n=40+39)
JSONEncode-4 126ms ± 1% 126ms ± 1% -0.38% (p=0.004 n=39+39)
JSONDecode-4 423ms ± 0% 426ms ± 2% +0.72% (p=0.001 n=39+40)
Mandelbrot200-4 18.4ms ± 0% 18.4ms ± 0% +0.04% (p=0.000 n=38+40)
GoParse-4 22.8ms ± 0% 22.6ms ± 0% -0.68% (p=0.000 n=35+40)
RegexpMatchEasy0_32-4 699ns ± 0% 704ns ± 0% +0.73% (p=0.000 n=27+40)
RegexpMatchEasy0_1K-4 4.27µs ± 0% 4.26µs ± 0% -0.09% (p=0.000 n=35+38)
RegexpMatchEasy1_32-4 741ns ± 0% 735ns ± 0% -0.85% (p=0.000 n=40+35)
RegexpMatchEasy1_1K-4 5.53µs ± 0% 5.49µs ± 0% -0.69% (p=0.000 n=39+40)
RegexpMatchMedium_32-4 1.07µs ± 0% 1.04µs ± 2% -2.34% (p=0.000 n=40+40)
RegexpMatchMedium_1K-4 261µs ± 0% 261µs ± 0% -0.16% (p=0.000 n=40+39)
RegexpMatchHard_32-4 14.9µs ± 0% 14.9µs ± 0% -0.18% (p=0.000 n=39+40)
RegexpMatchHard_1K-4 445µs ± 0% 446µs ± 0% +0.09% (p=0.000 n=36+34)
Revcomp-4 41.8ms ± 1% 41.8ms ± 1% ~ (p=0.595 n=39+38)
Template-4 530ms ± 1% 528ms ± 1% -0.49% (p=0.000 n=40+40)
TimeParse-4 3.39µs ± 0% 3.42µs ± 0% +0.98% (p=0.000 n=36+38)
TimeFormat-4 6.12µs ± 0% 6.07µs ± 0% -0.81% (p=0.000 n=34+38)
[Geo mean] 384µs 383µs -0.24%
name old speed new speed delta
GobDecode-4 14.4MB/s ± 1% 14.5MB/s ± 2% +1.11% (p=0.000 n=39+39)
GobEncode-4 15.3MB/s ± 3% 15.2MB/s ± 2% ~ (p=0.104 n=40+39)
Gzip-4 7.40MB/s ± 1% 7.36MB/s ± 0% -0.60% (p=0.000 n=40+39)
Gunzip-4 62.2MB/s ± 0% 62.1MB/s ± 0% -0.02% (p=0.047 n=40+39)
JSONEncode-4 15.4MB/s ± 1% 15.4MB/s ± 2% +0.39% (p=0.002 n=39+39)
JSONDecode-4 4.59MB/s ± 0% 4.56MB/s ± 2% -0.71% (p=0.000 n=39+40)
GoParse-4 2.54MB/s ± 0% 2.56MB/s ± 0% +0.72% (p=0.000 n=26+40)
RegexpMatchEasy0_32-4 45.8MB/s ± 0% 45.4MB/s ± 0% -0.75% (p=0.000 n=38+40)
RegexpMatchEasy0_1K-4 240MB/s ± 0% 240MB/s ± 0% +0.09% (p=0.000 n=35+38)
RegexpMatchEasy1_32-4 43.1MB/s ± 0% 43.5MB/s ± 0% +0.84% (p=0.000 n=40+39)
RegexpMatchEasy1_1K-4 185MB/s ± 0% 186MB/s ± 0% +0.69% (p=0.000 n=39+40)
RegexpMatchMedium_32-4 936kB/s ± 1% 959kB/s ± 2% +2.38% (p=0.000 n=40+40)
RegexpMatchMedium_1K-4 3.92MB/s ± 0% 3.93MB/s ± 0% +0.18% (p=0.000 n=39+40)
RegexpMatchHard_32-4 2.15MB/s ± 0% 2.15MB/s ± 0% +0.19% (p=0.000 n=40+40)
RegexpMatchHard_1K-4 2.30MB/s ± 0% 2.30MB/s ± 0% ~ (all equal)
Revcomp-4 60.8MB/s ± 1% 60.8MB/s ± 1% ~ (p=0.600 n=39+38)
Template-4 3.66MB/s ± 1% 3.68MB/s ± 1% +0.46% (p=0.000 n=40+40)
[Geo mean] 12.8MB/s 12.8MB/s +0.27%
Change-Id: I849161169ecf0876a04b7c1d3990fa8d1435215e
Reviewed-on: https://go-review.googlesource.com/122855
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2018-08-27 15:49:46 +00:00
Ben Shi
556971316f
cmd/compile: optimize 386's comparison
...
CMPL/CMPW/CMPB can take a memory operand on 386, and this CL
implements that optimization.
1. The total size of pkg/linux_386 decreases about 45KB, excluding
cmd/compile.
2. The go1 benchmark shows a little improvement.
name old time/op new time/op delta
BinaryTree17-4 3.36s ± 2% 3.37s ± 3% ~ (p=0.537 n=40+40)
Fannkuch11-4 3.59s ± 1% 3.53s ± 2% -1.58% (p=0.000 n=40+40)
FmtFprintfEmpty-4 46.0ns ± 3% 45.8ns ± 3% ~ (p=0.249 n=40+40)
FmtFprintfString-4 80.0ns ± 4% 78.8ns ± 3% -1.49% (p=0.001 n=40+40)
FmtFprintfInt-4 89.7ns ± 2% 90.3ns ± 2% +0.74% (p=0.003 n=40+40)
FmtFprintfIntInt-4 144ns ± 3% 143ns ± 3% -0.95% (p=0.003 n=40+40)
FmtFprintfPrefixedInt-4 181ns ± 4% 180ns ± 2% ~ (p=0.103 n=40+40)
FmtFprintfFloat-4 412ns ± 3% 408ns ± 4% -0.97% (p=0.018 n=40+40)
FmtManyArgs-4 607ns ± 4% 605ns ± 4% ~ (p=0.148 n=40+40)
GobDecode-4 7.19ms ± 4% 7.24ms ± 5% ~ (p=0.340 n=40+40)
GobEncode-4 7.04ms ± 9% 6.99ms ± 9% ~ (p=0.289 n=40+40)
Gzip-4 400ms ± 6% 398ms ± 5% ~ (p=0.168 n=40+40)
Gunzip-4 41.2ms ± 3% 41.7ms ± 3% +1.40% (p=0.001 n=40+40)
HTTPClientServer-4 62.5µs ± 1% 62.1µs ± 2% -0.61% (p=0.000 n=37+37)
JSONEncode-4 20.7ms ± 4% 20.4ms ± 3% -1.60% (p=0.000 n=40+40)
JSONDecode-4 69.4ms ± 4% 69.2ms ± 6% ~ (p=0.177 n=40+40)
Mandelbrot200-4 5.22ms ± 6% 5.21ms ± 3% ~ (p=0.531 n=40+40)
GoParse-4 3.29ms ± 3% 3.28ms ± 3% ~ (p=0.321 n=40+39)
RegexpMatchEasy0_32-4 104ns ± 4% 103ns ± 7% -0.89% (p=0.040 n=40+40)
RegexpMatchEasy0_1K-4 852ns ± 3% 853ns ± 2% ~ (p=0.357 n=40+40)
RegexpMatchEasy1_32-4 113ns ± 8% 113ns ± 3% ~ (p=0.906 n=40+40)
RegexpMatchEasy1_1K-4 1.03µs ± 4% 1.03µs ± 5% ~ (p=0.326 n=40+40)
RegexpMatchMedium_32-4 136ns ± 3% 133ns ± 3% -2.31% (p=0.000 n=40+40)
RegexpMatchMedium_1K-4 44.0µs ± 3% 43.7µs ± 3% ~ (p=0.053 n=40+40)
RegexpMatchHard_32-4 2.27µs ± 3% 2.26µs ± 4% ~ (p=0.391 n=40+40)
RegexpMatchHard_1K-4 68.0µs ± 3% 68.9µs ± 3% +1.28% (p=0.000 n=40+40)
Revcomp-4 1.86s ± 5% 1.86s ± 2% ~ (p=0.950 n=40+40)
Template-4 73.4ms ± 4% 69.9ms ± 7% -4.78% (p=0.000 n=40+40)
TimeParse-4 449ns ± 4% 441ns ± 5% -1.76% (p=0.000 n=40+40)
TimeFormat-4 416ns ± 3% 417ns ± 4% ~ (p=0.304 n=40+40)
[Geo mean] 67.7µs 67.3µs -0.55%
name old speed new speed delta
GobDecode-4 107MB/s ± 4% 106MB/s ± 5% ~ (p=0.336 n=40+40)
GobEncode-4 109MB/s ± 5% 110MB/s ± 9% ~ (p=0.142 n=38+40)
Gzip-4 48.5MB/s ± 5% 48.8MB/s ± 5% ~ (p=0.172 n=40+40)
Gunzip-4 472MB/s ± 3% 465MB/s ± 3% -1.39% (p=0.001 n=40+40)
JSONEncode-4 93.6MB/s ± 4% 95.1MB/s ± 3% +1.61% (p=0.000 n=40+40)
JSONDecode-4 28.0MB/s ± 3% 28.1MB/s ± 6% ~ (p=0.181 n=40+40)
GoParse-4 17.6MB/s ± 3% 17.7MB/s ± 3% ~ (p=0.350 n=40+39)
RegexpMatchEasy0_32-4 308MB/s ± 4% 311MB/s ± 6% +0.96% (p=0.025 n=40+40)
RegexpMatchEasy0_1K-4 1.20GB/s ± 3% 1.20GB/s ± 2% ~ (p=0.317 n=40+40)
RegexpMatchEasy1_32-4 282MB/s ± 7% 282MB/s ± 3% ~ (p=0.516 n=40+40)
RegexpMatchEasy1_1K-4 994MB/s ± 4% 991MB/s ± 5% ~ (p=0.319 n=40+40)
RegexpMatchMedium_32-4 7.31MB/s ± 3% 7.49MB/s ± 3% +2.46% (p=0.000 n=40+40)
RegexpMatchMedium_1K-4 23.3MB/s ± 3% 23.4MB/s ± 3% ~ (p=0.052 n=40+40)
RegexpMatchHard_32-4 14.1MB/s ± 3% 14.1MB/s ± 4% ~ (p=0.391 n=40+40)
RegexpMatchHard_1K-4 15.1MB/s ± 3% 14.9MB/s ± 3% -1.27% (p=0.000 n=40+40)
Revcomp-4 137MB/s ± 5% 137MB/s ± 2% ~ (p=0.942 n=40+40)
Template-4 26.5MB/s ± 4% 27.8MB/s ± 7% +5.03% (p=0.000 n=40+40)
[Geo mean] 78.6MB/s 79.0MB/s +0.57%
Change-Id: Idcacc6881ef57cd7dc33aa87b711282842b72a53
Reviewed-on: https://go-review.googlesource.com/126618
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-08-20 14:23:22 +00:00
Michael Munday
b65122f99a
cmd/compile: optimize comparisons using load merging where available
...
Multi-byte comparison operations were used on amd64, arm64, i386
and s390x for comparisons with constant arrays, but only amd64 and
i386 for comparisons with string constants. This CL combines the
check for platform capability, since they have the same requirements,
and also enables both on ppc64le which also supports load merging.
Note that these optimizations currently use little endian byte order
which results in byte reversal instructions on s390x. This should
be fixed at some point.
Change-Id: Ie612d13359b50c77f4d7c6e73fea4a59fa11f322
Reviewed-on: https://go-review.googlesource.com/102558
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-04-09 21:16:47 +00:00
Alberto Donizetti
a27cd4fd31
test/codegen: port tbz/tbnz arm64 tests
...
And delete them from asm_test.
Change-Id: I34fcf85ae8ce09cd146fe4ce6a0ae7616bd97e2d
Reviewed-on: https://go-review.googlesource.com/102296
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Giovanni Bajo <rasky@develer.com>
2018-03-24 09:35:53 +00:00
Alberto Donizetti
fc6280d4b0
test/codegen: port direct comparisons with memory tests
...
And remove them from asm_test.
Change-Id: I1ca29b40546d6de06f20bfd550ed8ff87f495454
Reviewed-on: https://go-review.googlesource.com/102115
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
2018-03-22 17:20:09 +00:00
Alberto Donizetti
be371edd67
test/codegen: port comparisons tests to codegen
...
And delete them from asm_test.
Change-Id: I64c512bfef3b3da6db5c5d29277675dade28b8ab
Reviewed-on: https://go-review.googlesource.com/101595
Run-TryBot: Alberto Donizetti <alb.donizetti@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Giovanni Bajo <rasky@develer.com>
2018-03-20 19:38:06 +00:00