mirror of
https://github.com/golang/go.git
synced 2025-10-28 15:24:13 +00:00
ADD(Q|L) has generally twice the throughput.
Came up in CL 626998.
Throughput by arch:
Zen 4:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.25
Intel Alder Lake:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.2
Intel Haswell:
SHLL (R64, 1): 0.5
ADD (R64, R64): 0.25
Also include a minor opt for:
(x + x) << c -> x << (c + 1)
Before this, the code:
func addShift(x int64) int64 {
return (x + x) << 1
}
emitted two instructions:
ADDQ AX, AX
SHLQ $1, AX
but we can do it in a single shift:
SHLQ $2, AX
Add a codegen test for clearing the last bit.
compilecmp linux/amd64:
math
math.sqrt 243 -> 242 (-0.41%)
math [cmd/compile]
math.sqrt 243 -> 242 (-0.41%)
runtime
runtime.selectgo 5455 -> 5445 (-0.18%)
runtime.sysargs 665 -> 662 (-0.45%)
runtime.isPinned 145 -> 141 (-2.76%)
runtime.atoi64 198 -> 194 (-2.02%)
runtime.setPinned 714 -> 709 (-0.70%)
runtime [cmd/compile]
runtime.sysargs 665 -> 662 (-0.45%)
runtime.setPinned 714 -> 709 (-0.70%)
runtime.atoi64 198 -> 194 (-2.02%)
runtime.isPinned 145 -> 141 (-2.76%)
strconv
strconv.computeBounds 109 -> 107 (-1.83%)
strconv.FormatInt 201 -> 197 (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%)
strconv.small 144 -> 134 (-6.94%)
strconv.AppendInt 357 -> 344 (-3.64%)
strconv.ryuDigits32 490 -> 488 (-0.41%)
strconv.AppendUint 342 -> 340 (-0.58%)
strconv [cmd/compile]
strconv.FormatInt 201 -> 197 (-1.99%)
strconv.ryuFtoaShortest 1298 -> 1266 (-2.47%)
strconv.ryuDigits32 490 -> 488 (-0.41%)
strconv.AppendUint 342 -> 340 (-0.58%)
strconv.computeBounds 109 -> 107 (-1.83%)
strconv.small 144 -> 134 (-6.94%)
strconv.AppendInt 357 -> 344 (-3.64%)
image
image.Rectangle.Inset 101 -> 97 (-3.96%)
regexp/syntax
regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%)
regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%)
regexp/syntax.ranges.Less 153 -> 150 (-1.96%)
regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%)
time
time.Time.Before 179 -> 161 (-10.06%)
time.Time.Compare 189 -> 166 (-12.17%)
time.Time.Sub 444 -> 425 (-4.28%)
time.Time.UnixMicro 106 -> 95 (-10.38%)
time.div 592 -> 587 (-0.84%)
time.Time.UnixNano 85 -> 78 (-8.24%)
time.(*Time).UnixMilli 141 -> 140 (-0.71%)
time.Time.UnixMilli 106 -> 95 (-10.38%)
time.(*Time).UnixMicro 141 -> 140 (-0.71%)
time.Time.After 179 -> 161 (-10.06%)
time.Time.Equal 170 -> 150 (-11.76%)
time.Time.AppendBinary 766 -> 757 (-1.17%)
time.Time.IsZero 74 -> 66 (-10.81%)
time.(*Time).UnixNano 124 -> 113 (-8.87%)
time.(*Time).IsZero 113 -> 108 (-4.42%)
regexp
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%)
regexp.QuoteMeta 485 -> 469 (-3.30%)
regexp/syntax [cmd/compile]
regexp/syntax.inCharClass.func1 111 -> 110 (-0.90%)
regexp/syntax.(*compiler).loop 583 -> 568 (-2.57%)
regexp/syntax.(*compiler).quest 586 -> 573 (-2.22%)
regexp/syntax.ranges.Less 153 -> 150 (-1.96%)
encoding/base64
encoding/base64.decodedLen 92 -> 90 (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%)
time [cmd/compile]
time.(*Time).IsZero 113 -> 108 (-4.42%)
time.Time.IsZero 74 -> 66 (-10.81%)
time.(*Time).UnixNano 124 -> 113 (-8.87%)
time.Time.UnixMilli 106 -> 95 (-10.38%)
time.Time.Equal 170 -> 150 (-11.76%)
time.Time.UnixMicro 106 -> 95 (-10.38%)
time.(*Time).UnixMicro 141 -> 140 (-0.71%)
time.Time.Before 179 -> 161 (-10.06%)
time.Time.UnixNano 85 -> 78 (-8.24%)
time.Time.AppendBinary 766 -> 757 (-1.17%)
time.div 592 -> 587 (-0.84%)
time.Time.After 179 -> 161 (-10.06%)
time.Time.Compare 189 -> 166 (-12.17%)
time.(*Time).UnixMilli 141 -> 140 (-0.71%)
time.Time.Sub 444 -> 425 (-4.28%)
index/suffixarray
index/suffixarray.sais_8_32 1677 -> 1645 (-1.91%)
index/suffixarray.sais_32 1677 -> 1645 (-1.91%)
index/suffixarray.sais_64 1677 -> 1654 (-1.37%)
index/suffixarray.sais_8_64 1677 -> 1654 (-1.37%)
index/suffixarray.writeInt 249 -> 247 (-0.80%)
os
os.Expand 1070 -> 1051 (-1.78%)
os.Chtimes 787 -> 774 (-1.65%)
regexp [cmd/compile]
regexp.(*Regexp).FindAllStringSubmatch.func1 590 -> 569 (-3.56%)
regexp.QuoteMeta 485 -> 469 (-3.30%)
encoding/base64 [cmd/compile]
encoding/base64.decodedLen 92 -> 90 (-2.17%)
encoding/base64.(*Encoding).DecodedLen 99 -> 97 (-2.02%)
encoding/hex
encoding/hex.Encode 138 -> 136 (-1.45%)
encoding/hex.(*decoder).Read 830 -> 824 (-0.72%)
crypto/des
crypto/des.initFeistelBox 235 -> 229 (-2.55%)
crypto/des.cryptBlock 549 -> 538 (-2.00%)
os [cmd/compile]
os.Chtimes 787 -> 774 (-1.65%)
os.Expand 1070 -> 1051 (-1.78%)
math/big
math/big.newFloat 238 -> 223 (-6.30%)
math/big.nat.mul 2138 -> 2122 (-0.75%)
math/big.karatsubaSqr 1372 -> 1369 (-0.22%)
math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%)
math/big.basicSqr 1032 -> 1017 (-1.45%)
cmd/vendor/golang.org/x/sys/unix
cmd/vendor/golang.org/x/sys/unix.TimeToTimespec 72 -> 66 (-8.33%)
encoding/json
encoding/json.Indent 404 -> 403 (-0.25%)
encoding/json.MarshalIndent 303 -> 297 (-1.98%)
testing
testing.(*T).Deadline 84 -> 82 (-2.38%)
testing.(*M).Run 3545 -> 3525 (-0.56%)
archive/zip
archive/zip.headerFileInfo.ModTime 229 -> 223 (-2.62%)
encoding/gob
encoding/gob.(*encoderState).encodeInt 474 -> 469 (-1.05%)
crypto/elliptic
crypto/elliptic.Marshal 728 -> 714 (-1.92%)
debug/buildinfo
debug/buildinfo.readString 325 -> 315 (-3.08%)
image/png
image/png.(*decoder).readImagePass 10866 -> 10834 (-0.29%)
archive/tar
archive/tar.Header.allowedFormats.func3 1768 -> 1736 (-1.81%)
archive/tar.formatPAXTime 389 -> 358 (-7.97%)
archive/tar.(*Writer).writeGNUHeader 741 -> 727 (-1.89%)
archive/tar.readGNUSparseMap0x1 709 -> 695 (-1.97%)
archive/tar.(*Writer).templateV7Plus 915 -> 909 (-0.66%)
crypto/internal/cryptotest
crypto/internal/cryptotest.TestHash.func4 890 -> 879 (-1.24%)
crypto/internal/cryptotest.TestStream.func6.1 646 -> 645 (-0.15%)
crypto/internal/cryptotest.testCipher.func3 1300 -> 1289 (-0.85%)
internal/pkgbits
internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%)
testing/quick
testing/quick.(*Config).getRand 316 -> 315 (-0.32%)
log/slog
log/slog.TimeValue 489 -> 479 (-2.04%)
runtime/pprof
runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%)
internal/coverage/cfile
internal/coverage/cfile.(*emitState).openMetaFile 824 -> 822 (-0.24%)
internal/coverage/cfile.(*emitState).openCounterFile 904 -> 892 (-1.33%)
cmd/internal/objabi
cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%)
crypto/ecdsa
crypto/ecdsa.pointFromAffine 1162 -> 1144 (-1.55%)
net
net.minNonzeroTime 313 -> 308 (-1.60%)
net.cgoLookupAddrPTR 812 -> 797 (-1.85%)
net.(*IPNet).String 851 -> 827 (-2.82%)
net.IP.AppendText 488 -> 471 (-3.48%)
net.IPMask.String 281 -> 270 (-3.91%)
net.partialDeadline 374 -> 366 (-2.14%)
net.hexString 249 -> 240 (-3.61%)
net.IP.String 454 -> 453 (-0.22%)
internal/fuzz
internal/fuzz.newPcgRand 240 -> 234 (-2.50%)
crypto/x509
crypto/x509.(*Certificate).isValid 2642 -> 2611 (-1.17%)
cmd/internal/obj/s390x
cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%)
encoding/hex [cmd/compile]
encoding/hex.(*decoder).Read 830 -> 824 (-0.72%)
encoding/hex.Encode 138 -> 136 (-1.45%)
cmd/internal/objabi [cmd/compile]
cmd/internal/objabi.expandArgs 1177 -> 1169 (-0.68%)
math/big [cmd/compile]
math/big.(*Float).sqrtInverse 895 -> 878 (-1.90%)
math/big.nat.mul 2138 -> 2122 (-0.75%)
math/big.karatsubaSqr 1372 -> 1369 (-0.22%)
math/big.basicSqr 1032 -> 1017 (-1.45%)
math/big.newFloat 238 -> 223 (-6.30%)
encoding/json [cmd/compile]
encoding/json.MarshalIndent 303 -> 297 (-1.98%)
encoding/json.Indent 404 -> 403 (-0.25%)
cmd/covdata
main.(*metaMerge).emitCounters 985 -> 973 (-1.22%)
runtime/pprof [cmd/compile]
runtime/pprof.(*profileBuilder).build 2341 -> 2322 (-0.81%)
cmd/compile/internal/syntax
cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%)
cmd/dist
main.runInstall 19081 -> 19049 (-0.17%)
crypto/tls
crypto/tls.extractPadding 176 -> 175 (-0.57%)
slices.Clone[[]crypto/tls.SignatureScheme,crypto/tls.SignatureScheme] 253 -> 247 (-2.37%)
slices.Clone[[]uint16,uint16] 253 -> 247 (-2.37%)
slices.Clone[[]crypto/tls.CurveID,crypto/tls.CurveID] 253 -> 247 (-2.37%)
crypto/tls.(*Config).cipherSuites 335 -> 326 (-2.69%)
slices.DeleteFunc[go.shape.[]crypto/tls.CurveID,go.shape.uint16] 437 -> 434 (-0.69%)
crypto/tls.dial 1349 -> 1339 (-0.74%)
slices.DeleteFunc[go.shape.[]uint16,go.shape.uint16] 437 -> 434 (-0.69%)
internal/pkgbits [cmd/compile]
internal/pkgbits.(*Encoder).Int64 113 -> 103 (-8.85%)
internal/pkgbits.(*Encoder).rawVarint 74 -> 72 (-2.70%)
cmd/compile/internal/syntax [cmd/compile]
cmd/compile/internal/syntax.(*source).fill 722 -> 703 (-2.63%)
cmd/internal/obj/s390x [cmd/compile]
cmd/internal/obj/s390x.buildop 33676 -> 33644 (-0.10%)
cmd/go/internal/trace
cmd/go/internal/trace.Flow 910 -> 886 (-2.64%)
cmd/go/internal/trace.(*Span).Done 311 -> 304 (-2.25%)
cmd/go/internal/trace.StartSpan 620 -> 615 (-0.81%)
cmd/internal/script
cmd/internal/script.(*Engine).Execute.func2 534 -> 528 (-1.12%)
cmd/link/internal/loader
cmd/link/internal/loader.(*Loader).SetSymSect 344 -> 338 (-1.74%)
net/http
net/http.(*Transport).queueForIdleConn 1797 -> 1766 (-1.73%)
net/http.(*Transport).getConn 2149 -> 2131 (-0.84%)
net/http.(*http2ClientConn).tooIdleLocked 207 -> 197 (-4.83%)
net/http.(*http2responseWriter).SetWriteDeadline.func1 520 -> 508 (-2.31%)
net/http.(*Cookie).Valid 837 -> 818 (-2.27%)
net/http.(*http2responseWriter).SetReadDeadline 373 -> 357 (-4.29%)
net/http.checkIfRange 701 -> 690 (-1.57%)
net/http.(*http2SettingsFrame).Value 325 -> 298 (-8.31%)
net/http.(*http2SettingsFrame).HasDuplicates 777 -> 767 (-1.29%)
net/http.(*Server).Serve 1746 -> 1739 (-0.40%)
net/http.http2traceGotConn 569 -> 556 (-2.28%)
net/http/pprof
net/http/pprof.collectProfile 242 -> 239 (-1.24%)
cmd/compile/internal/coverage
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%)
cmd/vendor/golang.org/x/telemetry/internal/upload
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).findWork 4570 -> 4540 (-0.66%)
cmd/vendor/golang.org/x/telemetry/internal/upload.(*uploader).reports 3604 -> 3572 (-0.89%)
cmd/compile/internal/coverage [cmd/compile]
cmd/compile/internal/coverage.metaHashAndLen 439 -> 438 (-0.23%)
cmd/vendor/golang.org/x/text/language
cmd/vendor/golang.org/x/text/language.regionGroupDist 287 -> 284 (-1.05%)
cmd/go/internal/vcweb
cmd/go/internal/vcweb.(*Server).overview.func1 1045 -> 1041 (-0.38%)
cmd/go/internal/vcs
cmd/go/internal/vcs.expand 761 -> 741 (-2.63%)
cmd/compile/internal/inline/inlheur
slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%)
cmd/compile/internal/inline/inlheur [cmd/compile]
slices.stableCmpFunc[go.shape.struct 2300 -> 2284 (-0.70%)
cmd/go/internal/modfetch/codehost
cmd/go/internal/modfetch/codehost.bzrParseStat 2217 -> 2213 (-0.18%)
cmd/link/internal/ld
cmd/link/internal/ld.decodetypeStructFieldCount 157 -> 152 (-3.18%)
cmd/link/internal/ld.(*Link).address 12559 -> 12495 (-0.51%)
cmd/link/internal/ld.(*dodataState).allocateDataSections 18345 -> 18205 (-0.76%)
cmd/link/internal/ld.elfshreloc 618 -> 616 (-0.32%)
cmd/link/internal/ld.(*deadcodePass).decodetypeMethods 794 -> 779 (-1.89%)
cmd/link/internal/ld.(*dodataState).assignDsymsToSection 668 -> 663 (-0.75%)
cmd/link/internal/ld.relocSectFn 285 -> 284 (-0.35%)
cmd/link/internal/ld.decodetypeIfaceMethodCount 146 -> 144 (-1.37%)
cmd/link/internal/ld.decodetypeArrayLen 157 -> 152 (-3.18%)
cmd/link/internal/arm64
cmd/link/internal/arm64.gensymlate.func1 895 -> 888 (-0.78%)
cmd/go/internal/modload
cmd/go/internal/modload.queryProxy.func3 1029 -> 1012 (-1.65%)
cmd/go/internal/load
cmd/go/internal/load.(*Package).setBuildInfo 8453 -> 8447 (-0.07%)
cmd/go/internal/clean
cmd/go/internal/clean.runClean 2120 -> 2104 (-0.75%)
cmd/compile/internal/ssa
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%)
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%)
cmd/compile/internal/ssa.(*debugState).buildLocationLists 3326 -> 3294 (-0.96%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.(*debugState).processValue 9756 -> 9724 (-0.33%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%)
cmd/compile/internal/ssa [cmd/compile]
cmd/compile/internal/ssa.rewriteValueARM64_OpARM64MOVHstoreidx2 730 -> 719 (-1.51%)
cmd/compile/internal/ssa.(*poset).aliasnodes 2010 -> 1978 (-1.59%)
cmd/compile/internal/ssa.(*poset).mergeroot 1079 -> 1054 (-2.32%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDQconst 3069 -> 2941 (-4.17%)
cmd/compile/internal/ssa.rewriteValueAMD64_OpAMD64ADDLconst 3069 -> 2941 (-4.17%)
file before after Δ %
math/bits.s 2352 2354 +2 +0.085%
math/bits [cmd/compile].s 2352 2354 +2 +0.085%
math.s 35675 35674 -1 -0.003%
math [cmd/compile].s 35675 35674 -1 -0.003%
runtime.s 577251 577245 -6 -0.001%
runtime [cmd/compile].s 642419 642438 +19 +0.003%
sort.s 37434 37435 +1 +0.003%
strconv.s 48391 48343 -48 -0.099%
sort [cmd/compile].s 37434 37435 +1 +0.003%
bufio.s 21386 21418 +32 +0.150%
strconv [cmd/compile].s 48391 48343 -48 -0.099%
image.s 34978 35022 +44 +0.126%
regexp/syntax.s 81719 81781 +62 +0.076%
time.s 94341 94184 -157 -0.166%
regexp.s 60411 60399 -12 -0.020%
bufio [cmd/compile].s 21512 21544 +32 +0.149%
encoding/binary.s 34062 34087 +25 +0.073%
regexp/syntax [cmd/compile].s 81719 81781 +62 +0.076%
encoding/base64.s 11907 11903 -4 -0.034%
time [cmd/compile].s 94341 94184 -157 -0.166%
index/suffixarray.s 41633 41527 -106 -0.255%
os.s 101770 101738 -32 -0.031%
regexp [cmd/compile].s 60411 60399 -12 -0.020%
encoding/binary [cmd/compile].s 37173 37198 +25 +0.067%
encoding/base64 [cmd/compile].s 11907 11903 -4 -0.034%
os/exec.s 23900 23907 +7 +0.029%
encoding/hex.s 6038 6030 -8 -0.132%
crypto/des.s 5073 5056 -17 -0.335%
os [cmd/compile].s 102030 101998 -32 -0.031%
vendor/golang.org/x/net/http2/hpack.s 22027 22033 +6 +0.027%
math/big.s 164808 164753 -55 -0.033%
cmd/vendor/golang.org/x/sys/unix.s 121450 121444 -6 -0.005%
encoding/json.s 110294 110287 -7 -0.006%
testing.s 115303 115281 -22 -0.019%
archive/zip.s 65329 65325 -4 -0.006%
os/user.s 10078 10080 +2 +0.020%
encoding/gob.s 143788 143783 -5 -0.003%
crypto/elliptic.s 30686 30704 +18 +0.059%
go/doc/comment.s 49401 49433 +32 +0.065%
debug/buildinfo.s 9095 9085 -10 -0.110%
image/png.s 36113 36081 -32 -0.089%
archive/tar.s 71994 71897 -97 -0.135%
crypto/internal/cryptotest.s 60872 60849 -23 -0.038%
internal/pkgbits.s 20441 20429 -12 -0.059%
testing/quick.s 8236 8235 -1 -0.012%
log/slog.s 77568 77558 -10 -0.013%
internal/trace/internal/oldtrace.s 52885 52896 +11 +0.021%
runtime/pprof.s 123978 123969 -9 -0.007%
internal/coverage/cfile.s 25198 25184 -14 -0.056%
cmd/internal/objabi.s 19954 19946 -8 -0.040%
crypto/ecdsa.s 29159 29141 -18 -0.062%
log/slog/internal/benchmarks.s 6694 6695 +1 +0.015%
net.s 299569 299503 -66 -0.022%
os/exec [cmd/compile].s 23888 23895 +7 +0.029%
internal/trace.s 179226 179240 +14 +0.008%
internal/fuzz.s 86190 86191 +1 +0.001%
crypto/x509.s 177195 177164 -31 -0.017%
cmd/internal/obj/s390x.s 121642 121610 -32 -0.026%
cmd/internal/obj/ppc64.s 140118 140122 +4 +0.003%
encoding/hex [cmd/compile].s 6149 6141 -8 -0.130%
cmd/internal/objabi [cmd/compile].s 19954 19946 -8 -0.040%
cmd/internal/obj/arm64.s 158523 158555 +32 +0.020%
go/doc/comment [cmd/compile].s 49512 49544 +32 +0.065%
math/big [cmd/compile].s 166394 166339 -55 -0.033%
encoding/json [cmd/compile].s 110712 110705 -7 -0.006%
cmd/covdata.s 39699 39687 -12 -0.030%
runtime/pprof [cmd/compile].s 125209 125200 -9 -0.007%
cmd/compile/internal/syntax.s 181755 181736 -19 -0.010%
cmd/dist.s 177893 177861 -32 -0.018%
crypto/tls.s 389157 389113 -44 -0.011%
internal/pkgbits [cmd/compile].s 41644 41632 -12 -0.029%
cmd/compile/internal/syntax [cmd/compile].s 196105 196086 -19 -0.010%
cmd/compile/internal/types.s 71315 71345 +30 +0.042%
cmd/internal/obj/s390x [cmd/compile].s 121733 121701 -32 -0.026%
cmd/go/internal/trace.s 4796 4760 -36 -0.751%
cmd/internal/obj/arm64 [cmd/compile].s 168120 168147 +27 +0.016%
cmd/internal/obj/ppc64 [cmd/compile].s 140219 140223 +4 +0.003%
cmd/internal/script.s 83442 83436 -6 -0.007%
cmd/link/internal/loader.s 93299 93294 -5 -0.005%
net/http.s 620639 620472 -167 -0.027%
net/http/pprof.s 35016 35013 -3 -0.009%
cmd/compile/internal/coverage.s 6668 6667 -1 -0.015%
cmd/vendor/golang.org/x/telemetry/internal/upload.s 34210 34148 -62 -0.181%
cmd/compile/internal/coverage [cmd/compile].s 6664 6663 -1 -0.015%
cmd/vendor/golang.org/x/text/language.s 48077 48074 -3 -0.006%
cmd/go/internal/vcweb.s 45193 45189 -4 -0.009%
cmd/go/internal/vcs.s 44749 44729 -20 -0.045%
cmd/compile/internal/inline/inlheur.s 83758 83742 -16 -0.019%
cmd/compile/internal/inline/inlheur [cmd/compile].s 84773 84757 -16 -0.019%
cmd/go/internal/modfetch/codehost.s 89098 89094 -4 -0.004%
cmd/trace.s 257550 257564 +14 +0.005%
cmd/link/internal/ld.s 641945 641706 -239 -0.037%
cmd/link/internal/arm64.s 34805 34798 -7 -0.020%
cmd/go/internal/modload.s 328971 328954 -17 -0.005%
cmd/go/internal/load.s 178877 178871 -6 -0.003%
cmd/go/internal/clean.s 11006 10990 -16 -0.145%
cmd/compile/internal/ssa.s 3552843 3553347 +504 +0.014%
cmd/compile/internal/ssa [cmd/compile].s 3752511 3753123 +612 +0.016%
total 36179015 36178687 -328 -0.001%
Change-Id: I251c2898ccf3c9931d162d87dabbd49cf4ec73a5
Reviewed-on: https://go-review.googlesource.com/c/go/+/641757
Reviewed-by: Keith Randall <khr@google.com>
Auto-Submit: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Cherry Mui <cherryyz@google.com>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
563 lines
13 KiB
Go
563 lines
13 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2018 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package codegen
|
|
|
|
// ------------------ //
|
|
// constant shifts //
|
|
// ------------------ //
|
|
|
|
func lshConst64x64(v int64) int64 {
|
|
// ppc64x:"SLD"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
|
return v << uint64(33)
|
|
}
|
|
|
|
func rshConst64Ux64(v uint64) uint64 {
|
|
// ppc64x:"SRD"
|
|
// riscv64:"SRLI\t",-"AND",-"SLTIU"
|
|
return v >> uint64(33)
|
|
}
|
|
|
|
func rshConst64Ux64Overflow32(v uint32) uint64 {
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 32
|
|
}
|
|
|
|
func rshConst64Ux64Overflow16(v uint16) uint64 {
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 16
|
|
}
|
|
|
|
func rshConst64Ux64Overflow8(v uint8) uint64 {
|
|
// riscv64:"MOV\t\\$0,",-"SRL"
|
|
return uint64(v) >> 8
|
|
}
|
|
|
|
func rshConst64x64(v int64) int64 {
|
|
// ppc64x:"SRAD"
|
|
// riscv64:"SRAI\t",-"OR",-"SLTIU"
|
|
return v >> uint64(33)
|
|
}
|
|
|
|
func rshConst64x64Overflow32(v int32) int64 {
|
|
// riscv64:"SRAIW",-"SLLI",-"SRAI\t"
|
|
return int64(v) >> 32
|
|
}
|
|
|
|
func rshConst64x64Overflow16(v int16) int64 {
|
|
// riscv64:"SLLI","SRAI",-"SRAIW"
|
|
return int64(v) >> 16
|
|
}
|
|
|
|
func rshConst64x64Overflow8(v int8) int64 {
|
|
// riscv64:"SLLI","SRAI",-"SRAIW"
|
|
return int64(v) >> 8
|
|
}
|
|
|
|
func lshConst32x1(v int32) int32 {
|
|
// amd64:"ADDL", -"SHLL"
|
|
return v << 1
|
|
}
|
|
|
|
func lshConst64x1(v int64) int64 {
|
|
// amd64:"ADDQ", -"SHLQ"
|
|
return v << 1
|
|
}
|
|
|
|
func lshConst32x64(v int32) int32 {
|
|
// ppc64x:"SLW"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU", -"MOVW"
|
|
return v << uint64(29)
|
|
}
|
|
|
|
func rshConst32Ux64(v uint32) uint32 {
|
|
// ppc64x:"SRW"
|
|
// riscv64:"SRLIW",-"AND",-"SLTIU", -"MOVW"
|
|
return v >> uint64(29)
|
|
}
|
|
|
|
func rshConst32x64(v int32) int32 {
|
|
// ppc64x:"SRAW"
|
|
// riscv64:"SRAIW",-"OR",-"SLTIU", -"MOVW"
|
|
return v >> uint64(29)
|
|
}
|
|
|
|
func lshConst64x32(v int64) int64 {
|
|
// ppc64x:"SLD"
|
|
// riscv64:"SLLI",-"AND",-"SLTIU"
|
|
return v << uint32(33)
|
|
}
|
|
|
|
func rshConst64Ux32(v uint64) uint64 {
|
|
// ppc64x:"SRD"
|
|
// riscv64:"SRLI\t",-"AND",-"SLTIU"
|
|
return v >> uint32(33)
|
|
}
|
|
|
|
func rshConst64x32(v int64) int64 {
|
|
// ppc64x:"SRAD"
|
|
// riscv64:"SRAI\t",-"OR",-"SLTIU"
|
|
return v >> uint32(33)
|
|
}
|
|
|
|
func lshConst32x1Add(x int32) int32 {
|
|
// amd64:"SHLL\t[$]2"
|
|
return (x + x) << 1
|
|
}
|
|
|
|
func lshConst64x1Add(x int64) int64 {
|
|
// amd64:"SHLQ\t[$]2"
|
|
return (x + x) << 1
|
|
}
|
|
|
|
func lshConst32x2Add(x int32) int32 {
|
|
// amd64:"SHLL\t[$]3"
|
|
return (x + x) << 2
|
|
}
|
|
|
|
func lshConst64x2Add(x int64) int64 {
|
|
// amd64:"SHLQ\t[$]3"
|
|
return (x + x) << 2
|
|
}
|
|
|
|
// ------------------ //
|
|
// masked shifts //
|
|
// ------------------ //
|
|
|
|
func lshMask64x64(v int64, s uint64) int64 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux64(v uint64, s uint64) uint64 {
|
|
// arm64:"LSR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x64(v int64, s uint64) int64 {
|
|
// arm64:"ASR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask32x64(v int32, s uint64) int32 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask32Ux64(v uint32, s uint64) uint32 {
|
|
// arm64:"LSR",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SRLW","SLTIU","NEG","AND\t",-"SRL\t"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rsh5Mask32Ux64(v uint32, s uint64) uint32 {
|
|
// riscv64:"SRLW",-"AND\t",-"SLTIU",-"SRL\t"
|
|
return v >> (s & 31)
|
|
}
|
|
|
|
func rshMask32x64(v int32, s uint64) int32 {
|
|
// arm64:"ASR",-"AND"
|
|
// ppc64x:"ISEL",-"ORN"
|
|
// riscv64:"SRAW","OR","SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rsh5Mask32x64(v int32, s uint64) int32 {
|
|
// riscv64:"SRAW",-"OR",-"SLTIU"
|
|
return v >> (s & 31)
|
|
}
|
|
|
|
func lshMask64x32(v int64, s uint32) int64 {
|
|
// arm64:"LSL",-"AND"
|
|
// ppc64x:"RLDICL",-"ORN"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << (s & 63)
|
|
}
|
|
|
|
func rshMask64Ux32(v uint64, s uint32) uint64 {
|
|
// arm64:"LSR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func rshMask64x32(v int64, s uint32) int64 {
|
|
// arm64:"ASR",-"AND",-"CSEL"
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> (s & 63)
|
|
}
|
|
|
|
func lshMask64x32Ext(v int64, s int32) int64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SLL",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v << uint(s&63)
|
|
}
|
|
|
|
func rshMask64Ux32Ext(v uint64, s int32) uint64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRL\t",-"AND\t",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
func rshMask64x32Ext(v int64, s int32) int64 {
|
|
// ppc64x:"RLDICL",-"ORN",-"ISEL"
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
return v >> uint(s&63)
|
|
}
|
|
|
|
// --------------- //
|
|
// signed shifts //
|
|
// --------------- //
|
|
|
|
// We do want to generate a test + panicshift for these cases.
|
|
func lshSigned(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:"TESTB"
|
|
_ = x << v8
|
|
// amd64:"TESTW"
|
|
_ = x << v16
|
|
// amd64:"TESTL"
|
|
_ = x << v32
|
|
// amd64:"TESTQ"
|
|
_ = x << v64
|
|
}
|
|
|
|
// We want to avoid generating a test + panicshift for these cases.
|
|
func lshSignedMasked(v8 int8, v16 int16, v32 int32, v64 int64, x int) {
|
|
// amd64:-"TESTB"
|
|
_ = x << (v8 & 7)
|
|
// amd64:-"TESTW"
|
|
_ = x << (v16 & 15)
|
|
// amd64:-"TESTL"
|
|
_ = x << (v32 & 31)
|
|
// amd64:-"TESTQ"
|
|
_ = x << (v64 & 63)
|
|
}
|
|
|
|
// ------------------ //
|
|
// bounded shifts //
|
|
// ------------------ //
|
|
|
|
func lshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// riscv64:"SLL",-"AND",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"LSL",-"CSEL"
|
|
return v << s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func rshGuarded64U(v uint64, s uint) uint64 {
|
|
if s < 64 {
|
|
// riscv64:"SRL\t",-"AND",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"LSR",-"CSEL"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func rshGuarded64(v int64, s uint) int64 {
|
|
if s < 64 {
|
|
// riscv64:"SRA\t",-"OR",-"SLTIU"
|
|
// s390x:-"RISBGZ",-"AND",-"LOCGR"
|
|
// wasm:-"Select",-".*LtU"
|
|
// arm64:"ASR",-"CSEL"
|
|
return v >> s
|
|
}
|
|
panic("shift too large")
|
|
}
|
|
|
|
func provedUnsignedShiftLeft(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r1 = val64 << shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r2 = val32 << shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r3 = val16 << shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r4 = val8 << shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedSignedShiftLeft(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r1 = val64 << shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r2 = val32 << shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r3 = val16 << shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSL",-"CSEL"
|
|
r4 = val8 << shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedUnsignedShiftRight(val64 uint64, val32 uint32, val16 uint16, val8 uint8, shift int) (r1 uint64, r2 uint32, r3 uint16, r4 uint8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r1 = val64 >> shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r2 = val32 >> shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r3 = val16 >> shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"LSR",-"CSEL"
|
|
r4 = val8 >> shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func provedSignedShiftRight(val64 int64, val32 int32, val16 int16, val8 int8, shift int) (r1 int64, r2 int32, r3 int16, r4 int8) {
|
|
if shift >= 0 && shift < 64 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r1 = val64 >> shift
|
|
}
|
|
if shift >= 0 && shift < 32 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r2 = val32 >> shift
|
|
}
|
|
if shift >= 0 && shift < 16 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r3 = val16 >> shift
|
|
}
|
|
if shift >= 0 && shift < 8 {
|
|
// arm64:"ASR",-"CSEL"
|
|
r4 = val8 >> shift
|
|
}
|
|
return r1, r2, r3, r4
|
|
}
|
|
|
|
func checkUnneededTrunc(tab *[100000]uint32, d uint64, v uint32, h uint16, b byte) (uint32, uint64) {
|
|
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f := tab[byte(v)^b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)&b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[byte(v)|b]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)&h]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)^h]
|
|
// ppc64x:-".*RLWINM",-".*RLDICR",".*CLRLSLDI"
|
|
f += tab[uint16(v)|h]
|
|
// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
f += tab[v&0xff]
|
|
// ppc64x:-".*AND",".*CLRLSLWI"
|
|
f += 2 * uint32(uint16(d))
|
|
// ppc64x:-".*AND",-"RLDICR",".*CLRLSLDI"
|
|
g := 2 * uint64(uint32(d))
|
|
return f, g
|
|
}
|
|
|
|
func checkCombinedShifts(v8 uint8, v16 uint16, v32 uint32, x32 int32, v64 uint64) (uint8, uint16, uint32, uint64, int64) {
|
|
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
f := (v8 & 0xF) << 2
|
|
// ppc64x:"CLRLSLWI"
|
|
f += byte(v16) << 3
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
g := (v16 & 0xFF) << 3
|
|
// ppc64x:-"AND","CLRLSLWI"
|
|
h := (v32 & 0xFFFFF) << 2
|
|
// ppc64x:"CLRLSLDI"
|
|
i := (v64 & 0xFFFFFFFF) << 5
|
|
// ppc64x:-"CLRLSLDI"
|
|
i += (v64 & 0xFFFFFFF) << 38
|
|
// ppc64x/power9:-"CLRLSLDI"
|
|
i += (v64 & 0xFFFF00) << 10
|
|
// ppc64x/power9:-"SLD","EXTSWSLI"
|
|
j := int64(x32+32) * 8
|
|
return f, g, h, i, j
|
|
}
|
|
|
|
func checkWidenAfterShift(v int64, u uint64) (int64, uint64) {
|
|
|
|
// ppc64x:-".*MOVW"
|
|
f := int32(v >> 32)
|
|
// ppc64x:".*MOVW"
|
|
f += int32(v >> 31)
|
|
// ppc64x:-".*MOVH"
|
|
g := int16(v >> 48)
|
|
// ppc64x:".*MOVH"
|
|
g += int16(v >> 30)
|
|
// ppc64x:-".*MOVH"
|
|
g += int16(f >> 16)
|
|
// ppc64x:-".*MOVB"
|
|
h := int8(v >> 56)
|
|
// ppc64x:".*MOVB"
|
|
h += int8(v >> 28)
|
|
// ppc64x:-".*MOVB"
|
|
h += int8(f >> 24)
|
|
// ppc64x:".*MOVB"
|
|
h += int8(f >> 16)
|
|
return int64(h), uint64(g)
|
|
}
|
|
|
|
func checkShiftAndMask32(v []uint32) {
|
|
i := 0
|
|
|
|
// ppc64x: "RLWNM\t[$]24, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
v[i] = (v[i] & 0xFF00000) >> 8
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
v[i] = (v[i] & 0xFF00) >> 6
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] & 0xFF) >> 8
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] & 0xF000000) >> 28
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]24, [$]31, R[0-9]+"
|
|
v[i] = (v[i] >> 6) & 0xFF
|
|
i++
|
|
// ppc64x: "RLWNM\t[$]26, R[0-9]+, [$]12, [$]19, R[0-9]+"
|
|
v[i] = (v[i] >> 6) & 0xFF000
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] >> 20) & 0xFF000
|
|
i++
|
|
// ppc64x: "MOVW\tR0"
|
|
v[i] = (v[i] >> 24) & 0xFF00
|
|
i++
|
|
}
|
|
|
|
func checkMergedShifts32(a [256]uint32, b [256]uint64, u uint32, v uint32) {
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
a[0] = a[uint8(v>>24)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]11, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[0] = b[uint8(v>>24)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[1] = b[(v>>20)&0xFF]
|
|
// ppc64x: -"SLD", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]28, R[0-9]+"
|
|
b[2] = b[v>>25]
|
|
}
|
|
|
|
func checkMergedShifts64(a [256]uint32, b [256]uint64, c [256]byte, v uint64) {
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]10, R[0-9]+, [$]22, [$]29, R[0-9]+"
|
|
a[0] = a[uint8(v>>24)]
|
|
// ppc64x: "SRD", "CLRLSLDI", -"RLWNM"
|
|
a[1] = a[uint8(v>>25)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]9, R[0-9]+, [$]23, [$]29, R[0-9]+"
|
|
a[2] = a[v>>25&0x7F]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]3, R[0-9]+, [$]29, [$]29, R[0-9]+"
|
|
a[3] = a[(v>>31)&0x01]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]12, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[0] = b[uint8(v>>23)]
|
|
// ppc64x: -"CLRLSLDI", "RLWNM\t[$]15, R[0-9]+, [$]21, [$]28, R[0-9]+"
|
|
b[1] = b[(v>>20)&0xFF]
|
|
// ppc64x: "RLWNM", -"SLD"
|
|
b[2] = b[((uint64((uint32(v) >> 21)) & 0x3f) << 4)]
|
|
// ppc64x: "RLWNM\t[$]11, R[0-9]+, [$]10, [$]15"
|
|
c[0] = c[((v>>5)&0x3F)<<16]
|
|
// ppc64x: "ANDCC\t[$]8064,"
|
|
c[1] = c[((v>>7)&0x3F)<<7]
|
|
}
|
|
|
|
func checkShiftMask(a uint32, b uint64, z []uint32, y []uint64) {
|
|
_ = y[128]
|
|
_ = z[128]
|
|
// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
|
|
z[0] = uint32(uint8(a >> 5))
|
|
// ppc64x: -"MOVBZ", -"SRW", "RLWNM"
|
|
z[1] = uint32(uint8((a >> 4) & 0x7e))
|
|
// ppc64x: "RLWNM\t[$]25, R[0-9]+, [$]27, [$]29, R[0-9]+"
|
|
z[2] = uint32(uint8(a>>7)) & 0x1c
|
|
// ppc64x: -"MOVWZ"
|
|
y[0] = uint64((a >> 6) & 0x1c)
|
|
// ppc64x: -"MOVWZ"
|
|
y[1] = uint64(uint32(b)<<6) + 1
|
|
// ppc64x: -"MOVHZ", -"MOVWZ"
|
|
y[2] = uint64((uint16(a) >> 9) & 0x1F)
|
|
// ppc64x: -"MOVHZ", -"MOVWZ", -"ANDCC"
|
|
y[3] = uint64(((uint16(a) & 0xFF0) >> 9) & 0x1F)
|
|
}
|
|
|
|
// 128 bit shifts
|
|
|
|
func check128bitShifts(x, y uint64, bits uint) (uint64, uint64) {
|
|
s := bits & 63
|
|
ŝ := (64 - bits) & 63
|
|
// check that the shift operation has two commas (three operands)
|
|
// amd64:"SHRQ.*,.*,"
|
|
shr := x>>s | y<<ŝ
|
|
// amd64:"SHLQ.*,.*,"
|
|
shl := x<<s | y>>ŝ
|
|
return shr, shl
|
|
}
|
|
|
|
func checkShiftToMask(u []uint64, s []int64) {
|
|
// amd64:-"SHR",-"SHL","ANDQ"
|
|
u[0] = u[0] >> 5 << 5
|
|
// amd64:-"SAR",-"SHL","ANDQ"
|
|
s[0] = s[0] >> 5 << 5
|
|
// amd64:-"SHR",-"SHL","ANDQ"
|
|
u[1] = u[1] << 5 >> 5
|
|
}
|
|
|
|
//
|
|
// Left shift with addition.
|
|
//
|
|
|
|
func checkLeftShiftWithAddition(a int64, b int64) int64 {
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH1ADD"
|
|
a = a + b<<1
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH2ADD"
|
|
a = a + b<<2
|
|
// riscv64/rva20u64: "SLLI","ADD"
|
|
// riscv64/rva22u64: "SH3ADD"
|
|
a = a + b<<3
|
|
return a
|
|
}
|