mirror of
https://github.com/golang/go.git
synced 2026-06-28 03:40:37 +00:00
Updated internal/runtime/maps/memhash_aes_simd.go to use the newly renamed Load functions. Conflicts: - src/cmd/compile/internal/amd64/simdssa.go - src/cmd/compile/internal/ssa/_gen/AMD64.rules - src/cmd/compile/internal/ssa/_gen/ARM64.rules - src/cmd/compile/internal/ssa/_gen/simdAMD64.rules - src/cmd/compile/internal/ssa/_gen/simdAMD64ops.go - src/cmd/compile/internal/ssa/rewriteAMD64.go - src/cmd/compile/internal/ssagen/intrinsics.go - src/cmd/compile/internal/types2/stdlib_test.go - src/go/types/stdlib_test.go - src/internal/buildcfg/exp.go - test/codegen/simd.go Merge List: + 2026-05-22bfbbe9667eruntime: remove unreachable code in malloc_generated.go + 2026-05-2299675026a7internal/strconv: fix mishandling of long outputs + 2026-05-22fc245b6427syscall: add export linknames for socketcall on S390X + 2026-05-22cc623858f6runtime/pprof: update test's expected frame count + 2026-05-21e4283592e5fmt: give advice on wrapper functions + 2026-05-211bcea1df64cmd/{vet,fix}: use new constants from /x/tools/go/analysis/suite + 2026-05-2160f0ced65binternal/testenv: make MustHaveSource detect missing source + 2026-05-21e0a8616941math/rand/v2: add method Rand.N + 2026-05-218621461b26cmd: update vendored x/arch + 2026-05-210db3804845archive/zip: turn off large zip test on 32-bit archs + 2026-05-21abdc5da461simd/archsimd/_gen: annotate text/template usage + 2026-05-21661e0c610einternal/strconv: work around escape analysis bug + 2026-05-2004ed01963eruntime/trace: remove unused runtime_readTrace declaration + 2026-05-20e2c188568dcmd/compile: compute embedded field offset in static initialization + 2026-05-20dd1da37fa4runtime: always call slowpath for heap bits in span + 2026-05-205a1c0ee6detest/fixedbugs: minor adjustments to line-directive specific tests + 2026-05-2078f63eb790crypto/internal/cryptotest/wycheproof: avoid reading go.sum at test time + 2026-05-20244c8ae4c8crypto/internal/fips140/nistec: avoid some mul64 in p256 calculations + 2026-05-202c659bb4dbcrypto/internal/fips140/nistec: optimize P-256 scalar fiat implementation + 2026-05-20e4e6887ceecrypto/internal/fips140/nistec: mechanically improve P-256 scalar fiat code + 2026-05-20be35de22f1crypto/internal/fips140/nistec: replace P-256 scalar assembly with fiat + 2026-05-2091a81e5ae1go/types,cmd/compile/internal/types2: add String methods + 2026-05-20bbf60f3bbdall: update to x/tools@b38156a7 + 2026-05-20f571fc93b0encoding/json: clarify that v1 Unmarshal calls UnmarshalerFrom methods + 2026-05-20c700213f6cencoding/json/jsontext: expand Decoder.UnreadBuffer documentation + 2026-05-204a38094e42database/sql: add RowsColumnScanner, expose ConvertAssign + 2026-05-204dde0f6c36all: use linknamestd for new linknames + 2026-05-206a002d1474cmd/dist: pass -std to assembler + 2026-05-20694604e524runtime: further reduce number of size classes + 2026-05-203652f299a8cmd/link: skip TestAbstractOriginSanity + 2026-05-2005ab7b8da5cmd/compile/internal/syntax: refactor/reword new line directives tests + 2026-05-20c0bd270406net/netip: update godoc comments + 2026-05-20acced3df03crypto/internal/fips140/edwards25519/field: delete Square amd64 assembly + 2026-05-20a00bbab762crypto/internal/fips140/edwards25519/field: speed up add chains + 2026-05-201926d1d95dcmd/compile: clarify relativity of a simple file name in a line directive + 2026-05-203a9c8e1d90archive/zip: fix writer-side Zip64 edge cases + 2026-05-20a7ea4a7ecdcmd/compile/internal/syntax: resolve //line filenames relative to source directory + 2026-05-20b8246db0c3cmd/go/internal/clean: print all removals + 2026-05-2096db4cf31fcmd/go/internal/clean: forget about makefiles + 2026-05-208a69bfb1bbarchive/zip: fix reader-side Zip64 edge cases + 2026-05-208b672822b2internal/profile: return error from gzip.Writer.Close in Profile.Write + 2026-05-20c410b4944eruntime: remove duplicated code in no scan slow path + 2026-05-202f0459745ccmd/compile: make ReassignOracle StaticValue unwrap parens + 2026-05-201bcfdf2df2cmd/compile: switch to ReassignOracle.StaticValue in escape call analysis + 2026-05-2084e0c4965acmd/compile: move FuncAssignments into ReassignOracle + 2026-05-205af294bac7cmd/compile: handle multiply-assigned func vars in escape analysis + 2026-05-203c05d2a519debug/pe: add FuzzReader + 2026-05-2071300e8011internal/strconv: use fast unrounded scaling for floating-point + 2026-05-20fd7a0e680dcmd: update golang.org/x/arch for riscv64 disassembler + 2026-05-20c3f7d75877internal/poll: omit embedded type field in splicePipe construction + 2026-05-204136ffed69simd/archsimd: decode non-broadcast memory operands + 2026-05-20856c405c4finternal/cpu: correcting spelling errors in the comments + 2026-05-20b9c5520dbcencoding/json: clarify that v1 Marshal calls MarshalerTo methods + 2026-05-20a40c232e81crypto/x509: skip TestReadUniqueDirectoryEntries if symlinks unsupported + 2026-05-207eeacc9ccenet/netip: remove incorrect comment in Prefix.AppendTo + 2026-05-204a6d3a3b46cmd/go/internal/envcmd: report GOPACKAGESDRIVER + 2026-05-206c8731962druntime: have patience for trailing thread in contention test + 2026-05-20b99b8feaaeruntime: split gp.m.locks bits for lock vs acquirem + 2026-05-1947f26133bdcmd/internal/obj/loong64: add ll.acq.{w,d}, sc.rel.{w,d}, sc.q instruction + 2026-05-1945f1313c18go/types: generate alias_test.go from respective types2 source + 2026-05-198494d25c4cos/signal: make NotifyContext Cause match context.Canceled + 2026-05-1937bce6617ftypes2, go/types: add missing alias test to types2, simplify go/types test + 2026-05-192760c3f5a3go/types: use mustParse helper to simplify tests where possible + 2026-05-19b12ed667d9crypto/ecdsa: test hash size restrictions + 2026-05-198329d31307runtime/loong64: use ABIInternal convention in cgocallbackg + 2026-05-19f93504bfd6cmd/internal/obj/loong64: add FRINT{F,D} instructions + 2026-05-1915f44ffcc3runtime: skip gcBlackenEnabled check and gcmarknewobject in fast path + 2026-05-192a93576965interrnal/buildcfg: enable SizeSpecializedMalloc by default + 2026-05-19b7ad0fe092all: use SkipObjectResolution mode in parser.ParseFile calls where possible + 2026-05-198ddf0031cfcmd/compile: disallow nointerface method satisfying type constraint + 2026-05-19063f8b07c1crypto/tls: fix broken quic_test.go + 2026-05-1924e654197aruntime: add benchmarks for allocating slices of pointers + 2026-05-191dd2bef375runtime/secret: fix cgo crashes inside of secret.Do + 2026-05-19c8b14e157fmath/big: only use pool for large allocations + 2026-05-19aee6009ba5cmd/link: check linkname access to assembly symbols + 2026-05-19ad46b4815ecrypto/tls: clamp effective minimum version to TLS 1.3 when using QUIC + 2026-05-19edf006c9a3net/mail: escape arbitrary input when including them in errors + 2026-05-190db7bea636cmd/dist: fix JSON processing of trailing bytes + 2026-05-195563d58a15go/printer: update comments and simplify test (cleanup) + 2026-05-19c07a0f09b8doc: document new ppc64/linux features + 2026-05-194b77d329eaencoding/json/v2: add string option hint optimization + 2026-05-19469636308bencoding/json/jsontext: drop duplicate import + 2026-05-1983b29183afcrypto/internal/fips140/rsa: add large exponent OAEP for ACVP + 2026-05-197f4f2c1c7bcrypto/ecdsa: check the hash length in PrivateKey.Sign + 2026-05-192f9a9642e1crypto/ecdsa: reject empty hashes + 2026-05-191debc9f0cecrypto/tls: surface private key parsing error from X509KeyPair + 2026-05-1918f72b3842crypto/tls: add a test for running with broken certificates + 2026-05-192f57f7626ecrypto/tls: remove the x509keypairleaf GODEBUG setting + 2026-05-191634ae8c7ccrypto/tls: remove the tls10server GODEBUG setting + 2026-05-190f4862de57crypto/tls: remove tls3des GODEBUG setting + 2026-05-1914a4bc2051crypto/tls: remove tlsrsakex GODEBUG setting + 2026-05-19a7bc19bf37crypto/tls: remove the tlsunsafeekm GODEBUG setting + 2026-05-195cc4ceb800crypto/tls: add TestInvalidHandshakeSignature + 2026-05-1978b71d40fdencoding/json/jsontext: skip allocation test when inlining is disabled + 2026-05-196b0243ccf6crypto/tls: implement MLKEM1024 key exchange + 2026-05-1997a57b481fcrypto/tls: use mlkem.GenerateKey for ML-KEM hybrids + 2026-05-1927532dc35ccrypto/tls: deprecate Config.Rand + 2026-05-19542d7d549fcrypto/tls: let Config.CurvePreferences override GODEBUG options + 2026-05-19c9a3e8bbd2encoding/json/jsontext: skip inline-dependent test on noopt builders + 2026-05-19e01f29f918crypto/internal/fips140/rsa: check hash length in PKCS#1 v1.5 signatures + 2026-05-1947cc60743bruntime,runtime/cgo: port ios/arm64 working dir setup from C to Go + 2026-05-1995e935b1b3crypto/tls: update generated certificates + 2026-05-19c74ba7d265crypto/tls: add ML-DSA support + 2026-05-19003833a138cmd/link: track content-hashed-ness for cloned symbols + 2026-05-1999623c5a17crypto/rsa: skip TestKeyGenerationVectors on older FIPS 140-3 modules + 2026-05-19f142be8f2fgo/printer: do not indent composite literals in return statements + 2026-05-194e51025e3ecrypto/x509: add ML-DSA support + 2026-05-19d80de8f117cmd/go: sort subcommands in help output + 2026-05-194bf23b51b8crypto/x509: honor SSL_CERT_{FILE,DIR} on windows/darwin + 2026-05-1993da30397dmath/big: move Int.Divide and corresponding test function up (cleanup) + 2026-05-195f47eb0cdfmath/big: refactored TestIntDivide tests, added more test cases + 2026-05-1905f75fb9e8internal/runtime/maps,runtime/: pass keys by value to MemHash{32,64} and StrHash. + 2026-05-19e26a373785runtime/secret: implement goroutine inheriting secret state + 2026-05-19e73e73470ecmd/compile: improve known bits debug print + 2026-05-197d2eb15103net/http/fcgi: handle error returned by w.Close() in writePairs + 2026-05-19880ef11ecfcmd/compile: make computeKnownBitsForShift iteration faster + 2026-05-19fabaedcbe8cmd/compile: fold == != with a const and a bijective operation into the const + 2026-05-1975560e67c9runtime: introduce a mallocgc fast path + 2026-05-196716b79b58lib/time: update to 2026b/2026b + 2026-05-192378242315runtime/_mkmalloc: allow for folding const bool exprs + 2026-05-19e9edbced42encoding/json/v2: remove recursion and error on `string` on unsupported type + 2026-05-19e8c1e370c9database/sql: add cursor cancelation test, document some cursor issues + 2026-05-1964315a2d18bytes, strings: use builtin min function in genSplit + 2026-05-1903d1f8efc8crypto/rsa: bypass Go+BoringCrypto for small, insecure, flaky keys + 2026-05-19c2ecd421b8crypto/mlkem: add Wycheproof coverage + 2026-05-197df2a42f94crypto: move Wycheproof test coverage from x/crypto + 2026-05-19caa4c72feecrypto/x509: accept non-string pkix.Name attributes + 2026-05-19d2095798a1crypto/internal/cryptotest: add Wycheproof schema/helpers + 2026-05-193e1c31701ccrypto/ecdsa: add c2sp.org/det-keygen test vectors for ECDSA key generation + 2026-05-190db36238c6cmd/internal/obj/x86: shorten MOVQ r64, imm32 for positive immediates + 2026-05-19c888fd67f0crypto/mldsa: don't precompute PublicKey + 2026-05-197bc111c6ebcrypto/mldsa: new package + 2026-05-194212586726cmd/compile/internal/ssa: prefer registers x8-x15/f8-f15 on riscv64 + 2026-05-190c3b9f837dcmd/compile: fix corner case boundedness for oversized shifts + 2026-05-188f7f951965math/big: add Int.Divide and RoundingMode aliases + 2026-05-182677fe9bbego/constant: add StringLen function + 2026-05-1815fd4ff942runtime: move post allocation work into postMallocgc + 2026-05-18c7a107bfbfencoding/json/internal/jsontest: rename testdata to _embed + 2026-05-18722ee60825runtime: combine sizespecializedmalloc small stubs into a single stub + 2026-05-180a151acad8runtime: remove race and valgrind cases from specializedmalloc stubs + 2026-05-18b23aea0c94runtime/_mkmalloc: set position in substituteWithBasicLit + 2026-05-18e212a16d1einternal/buildcfg: flip default of GenericMethods + 2026-05-1821e3cdefc3cmd/go: simplify go.mod to have at most two require sections + 2026-05-18f3f3d0859acmd/compile/internal/noder: update UIR to V4 + 2026-05-18813b317cc9net/http/httptest: add NewTestServer with in-memory network + 2026-05-18a871fd3732internal/nettest: add internal fake networking implementation + 2026-05-182e67b18935net/http: fix data race in TestServerNoWriteTimeout/h2 + 2026-05-1871c7ea1c6ccrypto/internal/fips140/aes/gcm: constant-time GHASH + 2026-05-18c1f0b9bdbago/printer: fix false positive doc comment + 2026-05-1844fde0fd08crypto/internal/fips140/rsa: add large exponent support for ACVP tests + 2026-05-183cdb042b2ecrypto/rsa: add c2sp.org/det-keygen test vectors for RSA key generation + 2026-05-185cd903156ecrypto/rsa: generate primes ≡ 7 mod 8 and update comments + 2026-05-182361851aa9crypto: improve panic message when a hash function is unavailable + 2026-05-186de59e2070crypto: return an error if a hash function is not available + 2026-05-183825609217crypto/tls: remove a couple FIPS 140-3 mode skip from tests + 2026-05-18aca2bff284crypto/tls: consistently use testenv.SetGODEBUG in tests + 2026-05-189578a80f15crypto/tls: remove old test config and certificates + 2026-05-18907b4be52bcrypto/tls: port TestClientAuth to the new certificates + 2026-05-18c78a8273c8crypto/tls: migrate off legacy testConfig + 2026-05-18ca4f272170crypto/tls: switch tests to new test certificates and keys + 2026-05-18320e0be23druntime/pprof: possibly deflake TestGoroutineLeakProfileConcurrency stress tests + 2026-05-186997bcd820crypto/x509: add RawSignatureAlgorithm + 2026-05-18e62d3e6e89internal/buildcfg: enable JSONv2 as baseline + 2026-05-18250d0eb6eemath/big: reduce x1,x2 via subtraction + 2026-05-1769a99fdcbbnet/netip: inline single-use Addr.string{4,6,4In6} methods + 2026-05-169df04115d6log/slog: document context.Background use in non-Context methods + 2026-05-164e06ed21acruntime: throw if a timespec64 can't be converted to a timespec32 + 2026-05-160d54be530bcmd/compile: cleanup ARM64 shift lowering + 2026-05-159e0467b174cmd/compile: remove flags → bool → flags roundtrips on amd64 + 2026-05-15c6eaf03788database/sql: run tests with different driver variants + 2026-05-159be7615aa2cmd/compile: represent escape analysis callees as a slice + 2026-05-15f4bfb1a9c6cmd/compile: treat singly-assigned func vars as static in escape analysis + 2026-05-151a7e601d07net/textproto: escape arbitrary input when including them in errors + 2026-05-15ab7c8279a0cmd/compile, runtime: use fine-grained FENCE instructions on riscv64 + 2026-05-15212065c922cmd/compile: shuffle bits.Sub intrinsic generation on amd64 + 2026-05-158bd95ae848src: fix spelling mistakes + 2026-05-14080a6d5fa8net/http: disable HTTP/3 tests prior to freeze + 2026-05-1480123ef4bfcmd/compile: preserve pointerness during splitload + 2026-05-14c203e4ecb9image, image/gif: document DecodeConfig before Decode for untrusted input + 2026-05-137601c4bf42net/http/internal/http2: reject STREAM_ENDED + Content-Length request + 2026-05-13c22f92a751net/http: fix hang in TestTransportClosesBodyOnError/h3 + 2026-05-1381f747893dnet/netip: fix typo in AddrPort.AppendBinary godoc + 2026-05-13168fe84e6cgo/ast: fix godoc links + 2026-05-13364de84f36all: turn on cgo/external linking for linux/ppc64 + 2026-05-13922abf576dcmd/go: add constant for requires simplification + 2026-05-1258efaf3859cmd/asm, cmd/internal/obj: add zvbb/zvbc for riscv64 + 2026-05-12aa3c8ed492net/http: move TestOmitHTTP2 to cmd/dist + 2026-05-1242bdffec2dcmd/go: force external linking when CGO_LDFLAGS contains static-linking flags + 2026-05-12cd913caa3fcmd/go/internal/telemetrystats: add go/platform/target/port:*-* counter + 2026-05-12a5a336cda2debug/pe, debug/macho: use saferio.ReadData for ZLIB section decompression + 2026-05-12f552547748cmd/compile: fixed error message about println says print + 2026-05-125b106947d1cmd/compile: propagate desired registers through phi nodes + 2026-05-1255089b9e27runtime: remove specialized classes larger than 128 bytes + 2026-05-1215129eb73bruntime: add microbenchmarks for sizespecializedmalloc + 2026-05-119936a78b78runtime: consolidate tiny sizespecializedmalloc functions + 2026-05-11326e7845a2all: update to x/net@ad8140e0aa + 2026-05-11358cf41413cmd/internal/obj/arm: use single BIC for AND with negative-rotated immediate + 2026-05-1111a3b27b91crypto/hkdf: fix example to derive three different 128-bit keys + 2026-05-112568174249runtime: fix TestUsingVDSO on linux/ppc64le + 2026-05-102403e594a5internal/runtime/maps: rewrite MemHash{32,64} using simd/archsimd intrinsics + 2026-05-08ce4fc9417ccmd/compile: use inline tree index to identify call stack + 2026-05-0855ff407d4fcmd/internal/obj: print error on duplicate symbol definitions + 2026-05-08e49b53439dcmd/compile/internal/obj/arm64: add RPRFM instruction for range prefetch + 2026-05-0874c35fca7acmd/compile: canonicalize x+x into x<<1 in generic.rules + 2026-05-08f133609b75runtime: eliminate false positives in ctrlGroupMatchH2 on ARM64 + 2026-05-08816c1a79fbgo/importer: un-deprecate importer.ForCompiler + 2026-05-083f3387fab8cmd/compile: catch missed case in binary-search-for-switch + 2026-05-08c7d87cda53runtime/cgo: add acquire/release back around malloc + 2026-05-08373b3a9097test: update newinline.go for closure name change + 2026-05-08afcf04cb64internal/goexperiment: actually delete goroutineleakprofile experiment + 2026-05-07e30b75a910cmd/cgo/internal/testsanitizers: bound ASAN C support probe + 2026-05-07834214f787runtime: fix TestUsingVDSO on Linux ARMv6 (Pi 1) + 2026-05-07ea0da4047cnet/http/internal/http2: close client conn on GOAWAY with no reqs in-flight + 2026-05-078042aaf03cruntime/maps: only grow small full maps when inserting new keys + 2026-05-07409f784beacmd/compile: simplify closure name + 2026-05-071456da550acrypto/tls: add QUICConfig.ClientHelloInfoConn + 2026-05-07fee42ee058src: spelling and grammar fixes + 2026-05-078908cc14cccmd/internal: fix error message + 2026-05-07f2b1b38293cmd/compile: crash if we try to generate a truncated AMD64 const shift + 2026-05-07c3bfc824a5cmd/compile: do not misscompile x+x << 63 to x << 0 on amd64 + 2026-05-07784ea961a4runtime: avoid concurrent use of synctest timer race context. + 2026-05-0715b9fc2659net/http: support non-tls.Conn TLS connections + 2026-05-07887f38afa9net/http: fix FileServer tests that are racy for HTTP/3 + 2026-05-0770634e7d67net/http: adjust several tests to work for HTTP/3 + 2026-05-074d7ac7ff23all: update to x/net@689f70a42a + 2026-05-071a9af07120cmd/go: reject sumdb response lacking module hash + 2026-05-07788b1c54c1all: avoid unsafe StringToUTF16Ptr on Windows + 2026-05-07f9f6dc7c82archive/tar: clarify that tarinsecurepath=0 does not apply to linknames + 2026-05-072747d887ebcompress/flate: clarify compatibility promise + 2026-05-07714a94dd31cmd/compile/internal/noder: put type args inside parenthesis + 2026-05-0616449179ecinternal/goexperiment,runtime: drop goroutineleakprofile experiment + 2026-05-063b7d571c99html/template: use zero-alloc bytes.EqualFold + 2026-05-0666843181d1cmd/go: fix potention deadlock + 2026-05-06b32283b27bcmd/go: fix length is not equal cause bytes.Equal never return true + 2026-05-06caeb5b7b66cmd/api: fix false positive and false negative in isDeprecated + 2026-05-06deee1b75cfcmd/api/testdata: add test case for issue 79145 + 2026-05-06f03f2ab67ago/types: prevent panic with multi-tag, multi-file test packages + 2026-05-06f230dd8a1dmime: avoid quadratic complexity in WordDecoder.DecodeHeader + 2026-05-06eb845eca72go/types, types2: include type arguments in instantiated type cycle errors + 2026-05-063cf84263ecruntime: prune tombstones before rehash in fast32 pointer-key insert + 2026-05-06edc5480072cmd/go: correct go/vcs counter names + 2026-05-06978f00ab7fcrypto/x509/pkix: render string-typed attribute values as strings + 2026-05-06253aa2a12ainternal/buildcfg: enable goroutineleakprofile GOEXPERIMENT by default + 2026-05-060b87c1d350regexp: reimplement API using iterators, revise doc comments + 2026-05-06d5ebe8100dcmd/compile: use binsearch-not-table for simd non-constant immediates when retpoline + 2026-05-0507840ceeedindex/suffixarray: fix incorrect condition + 2026-05-05628674a0c1cmd/compile: schedule increments after flags + 2026-05-05d81ba6c35druntime: exclude main goroutine blocked on select{} from goroutine leak profile + 2026-05-0519f8047c26all: update to x/net@5e11a5ab89 + 2026-05-050b54a75319encoding/json/v2: support `format` tag option behind goexperiment + 2026-05-05d5d2bde748encoding/json/jsontext: document underlying data storage of Token + 2026-05-05f2a43196d1encoding/json/jsontext: use custom wrapper type for Token accessor errors + 2026-05-051bd98fab2ccrypto/internal/fips140/drbg: fix Wasm stub + 2026-05-056f19c3b459cmd/compile: add missing bound checks when handle zero-sized values + 2026-05-04e929fb78e4index/suffixarray: protect against another data corruption + 2026-05-042098279730index/suffixarray: report error rather than panic for corrupted data + 2026-05-044e4b780652cmd/compile/internal/noder: hoist up generic methods assertion Change-Id: Iecbe9b5fbcd86b4094a839b03aa8f7e0c28275de
147 lines
3.6 KiB
Go
147 lines
3.6 KiB
Go
// asmcheck
|
|
|
|
// Copyright 2025 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// These tests check code generation of simd peephole optimizations.
|
|
|
|
//go:build goexperiment.simd && amd64
|
|
|
|
package codegen
|
|
|
|
import (
|
|
"math"
|
|
"simd/archsimd"
|
|
)
|
|
|
|
func vptest1() bool {
|
|
v1 := archsimd.LoadUint64x2([]uint64{0, 1})
|
|
v2 := archsimd.LoadUint64x2([]uint64{0, 0})
|
|
// amd64:`VPTEST (.*)(.*)$`
|
|
// amd64:`SETCS (.*)$`
|
|
return v1.AndNot(v2).IsZero()
|
|
}
|
|
|
|
func vptest2() bool {
|
|
v1 := archsimd.LoadUint64x2([]uint64{0, 1})
|
|
v2 := archsimd.LoadUint64x2([]uint64{0, 0})
|
|
// amd64:`VPTEST (.*)(.*)$`
|
|
// amd64:`SETEQ (.*)$`
|
|
return v1.And(v2).IsZero()
|
|
}
|
|
|
|
type Args2 struct {
|
|
V0 archsimd.Uint8x32
|
|
V1 archsimd.Uint8x32
|
|
x string
|
|
}
|
|
|
|
//go:noinline
|
|
func simdStructNoSpill(a Args2) archsimd.Uint8x32 {
|
|
// amd64:-`VMOVDQU .*$`
|
|
return a.V0.Xor(a.V1)
|
|
}
|
|
|
|
func simdStructWrapperNoSpill(a Args2) archsimd.Uint8x32 {
|
|
// amd64:-`VMOVDQU .*$`
|
|
a.x = "test"
|
|
return simdStructNoSpill(a)
|
|
}
|
|
|
|
//go:noinline
|
|
func simdArrayNoSpill(a [1]Args2) archsimd.Uint8x32 {
|
|
// amd64:-`VMOVDQU .*$`
|
|
return a[0].V0.Xor(a[0].V1)
|
|
}
|
|
|
|
func simdArrayWrapperNoSpill(a [1]Args2) archsimd.Uint8x32 {
|
|
// amd64:-`VMOVDQU .*$`
|
|
a[0].x = "test"
|
|
return simdArrayNoSpill(a)
|
|
}
|
|
|
|
func simdFeatureGuardedMaskOpt() archsimd.Int16x16 {
|
|
var x, y archsimd.Int16x16
|
|
if archsimd.X86.AVX512() {
|
|
mask := archsimd.Mask16x16FromBits(5)
|
|
return x.Add(y).Masked(mask) // amd64:`VPADDW.Z .*$`
|
|
}
|
|
mask := archsimd.Mask16x16FromBits(5)
|
|
return x.Add(y).Masked(mask) // amd64:`VPAND .*$`
|
|
}
|
|
|
|
func simdMaskedMerge() archsimd.Int16x16 {
|
|
var x, y archsimd.Int16x16
|
|
if archsimd.X86.AVX512() {
|
|
mask := archsimd.Mask16x16FromBits(5)
|
|
return x.Add(y).Merge(x, mask) // amd64:-`VPBLENDVB .*$`
|
|
}
|
|
mask := archsimd.Mask16x16FromBits(5)
|
|
return x.Add(y).Merge(x, mask) // amd64:`VPBLENDVB .*$`
|
|
}
|
|
|
|
var nan = math.NaN()
|
|
var floats64s = []float64{0, 1, 2, nan, 4, nan, 6, 7, 8, 9, 10, 11, nan, 13, 14, 15}
|
|
var sinkInt64s = make([]int64, 100)
|
|
|
|
func simdIsNaN() {
|
|
x := archsimd.LoadFloat64x4(floats64s)
|
|
y := archsimd.LoadFloat64x4(floats64s[4:])
|
|
a := x.IsNaN()
|
|
b := y.IsNaN()
|
|
// amd64:"VCMPPD [$]3," -"VPOR"
|
|
c := a.Or(b)
|
|
c.ToInt64x4().Store(sinkInt64s)
|
|
}
|
|
|
|
func simdIsNaN512() {
|
|
x := archsimd.LoadFloat64x8(floats64s)
|
|
y := archsimd.LoadFloat64x8(floats64s[8:])
|
|
a := x.IsNaN()
|
|
b := y.IsNaN()
|
|
// amd64:"VCMPPD [$]3," -"VPOR"
|
|
c := a.Or(b)
|
|
c.ToInt64x8().Store(sinkInt64s)
|
|
}
|
|
|
|
func sftImmVPSRL() archsimd.Uint32x4 {
|
|
var x archsimd.Uint32x4
|
|
// amd64:`VPSRLD \$1, .*$`
|
|
return x.ShiftAllRight(1)
|
|
}
|
|
|
|
func aLtbLtc8_avx512(a, b, c archsimd.Int8x64) archsimd.Mask8x64 {
|
|
// the vector length implies AVX512 implies the mask operations.
|
|
// amd64:`KANDB`
|
|
return a.Less(b).And(b.Less(c))
|
|
}
|
|
|
|
func aLtbORbLtc8_avx512(a, b, c archsimd.Int8x64) archsimd.Mask8x64 {
|
|
// the vector length implies AVX512 implies the mask operations.
|
|
// amd64:`KORB`
|
|
return a.Less(b).Or(b.Less(c))
|
|
}
|
|
|
|
func aLtbLtc64_avx512(a, b, c archsimd.Int64x8) archsimd.Mask64x8 {
|
|
// the vector length implies AVX512 implies the mask operations.
|
|
// amd64:`KANDQ`
|
|
return a.Less(b).And(b.Less(c))
|
|
}
|
|
|
|
func aLtbORbLtc64_avx512(a, b, c archsimd.Int64x8) archsimd.Mask64x8 {
|
|
// the vector length implies AVX512 implies the mask operations.
|
|
// amd64:`KORQ`
|
|
return a.Less(b).Or(b.Less(c))
|
|
}
|
|
|
|
var globalSlice = []uint32{1, 2, 3, 4, 5, 6, 7, 8}
|
|
|
|
func simdMemoryOperandMerge() archsimd.Uint32x4 {
|
|
a := archsimd.BroadcastUint32x4(1)
|
|
// amd64:`VPADDD \([A-Z]+\), X\d, X\d`
|
|
a = a.Add(archsimd.LoadUint32x4(globalSlice[0:4]))
|
|
// amd64:`VPADDD 16\([A-Z]+\), X\d, X\d`
|
|
a = a.Add(archsimd.LoadUint32x4(globalSlice[4:8]))
|
|
return a
|
|
}
|