diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 3ae3c617646..f511e75e972 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -18,6 +18,7 @@ import ( "cmd/internal/obj" "cmd/internal/obj/x86" "internal/abi" + "internal/buildcfg" ) // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags. @@ -1290,7 +1291,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail: if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal { // zeroing X15 when entering ABIInternal from ABI0 - opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) + zeroX15(s) // set G register from TLS getgFromTLS(s, x86.REG_R14) } @@ -1301,7 +1302,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { s.Call(v) if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 { // zeroing X15 when entering ABIInternal from ABI0 - opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) + zeroX15(s) // set G register from TLS getgFromTLS(s, x86.REG_R14) } @@ -1829,6 +1830,34 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { } } +// zeroX15 zeroes the X15 register. +func zeroX15(s *ssagen.State) { + vxorps := func(s *ssagen.State) { + p := s.Prog(x86.AVXORPS) + p.From.Type = obj.TYPE_REG + p.From.Reg = x86.REG_X15 + p.AddRestSourceReg(x86.REG_X15) + p.To.Type = obj.TYPE_REG + p.To.Reg = x86.REG_X15 + } + if buildcfg.GOAMD64 >= 3 { + vxorps(s) + return + } + // AVX may not be available, check before zeroing the high bits. + p := s.Prog(x86.ACMPB) + p.From.Type = obj.TYPE_MEM + p.From.Name = obj.NAME_EXTERN + p.From.Sym = ir.Syms.X86HasAVX + p.To.Type = obj.TYPE_CONST + p.To.Offset = 1 + jmp := s.Prog(x86.AJNE) + jmp.To.Type = obj.TYPE_BRANCH + vxorps(s) + sse := opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15) + jmp.To.SetTarget(sse) +} + // Example instruction: VRSQRTPS X1, X1 func simdV11(s *ssagen.State, v *ssa.Value) *obj.Prog { p := s.Prog(v.Op.Asm()) diff --git a/src/cmd/compile/internal/ir/symtab.go b/src/cmd/compile/internal/ir/symtab.go index ee0f52fbf3f..2222a5444aa 100644 --- a/src/cmd/compile/internal/ir/symtab.go +++ b/src/cmd/compile/internal/ir/symtab.go @@ -68,6 +68,7 @@ type symsStruct struct { Loong64HasLAM_BH *obj.LSym Loong64HasLSX *obj.LSym RISCV64HasZbb *obj.LSym + X86HasAVX *obj.LSym X86HasFMA *obj.LSym X86HasPOPCNT *obj.LSym X86HasSSE41 *obj.LSym diff --git a/src/cmd/compile/internal/ssagen/ssa.go b/src/cmd/compile/internal/ssagen/ssa.go index abb6370a15f..57129817f6c 100644 --- a/src/cmd/compile/internal/ssagen/ssa.go +++ b/src/cmd/compile/internal/ssagen/ssa.go @@ -150,9 +150,10 @@ func InitConfig() { ir.Syms.TypeAssert = typecheck.LookupRuntimeFunc("typeAssert") ir.Syms.WBZero = typecheck.LookupRuntimeFunc("wbZero") ir.Syms.WBMove = typecheck.LookupRuntimeFunc("wbMove") + ir.Syms.X86HasAVX = typecheck.LookupRuntimeVar("x86HasAVX") // bool + ir.Syms.X86HasFMA = typecheck.LookupRuntimeVar("x86HasFMA") // bool ir.Syms.X86HasPOPCNT = typecheck.LookupRuntimeVar("x86HasPOPCNT") // bool ir.Syms.X86HasSSE41 = typecheck.LookupRuntimeVar("x86HasSSE41") // bool - ir.Syms.X86HasFMA = typecheck.LookupRuntimeVar("x86HasFMA") // bool ir.Syms.ARMHasVFPv4 = typecheck.LookupRuntimeVar("armHasVFPv4") // bool ir.Syms.ARM64HasATOMICS = typecheck.LookupRuntimeVar("arm64HasATOMICS") // bool ir.Syms.Loong64HasLAMCAS = typecheck.LookupRuntimeVar("loong64HasLAMCAS") // bool @@ -7714,4 +7715,3 @@ func isStructNotSIMD(t *types.Type) bool { } var BoundsCheckFunc [ssa.BoundsKindCount]*obj.LSym - diff --git a/src/cmd/compile/internal/typecheck/_builtin/runtime.go b/src/cmd/compile/internal/typecheck/_builtin/runtime.go index 296bfdc281d..1e4d0b7db6e 100644 --- a/src/cmd/compile/internal/typecheck/_builtin/runtime.go +++ b/src/cmd/compile/internal/typecheck/_builtin/runtime.go @@ -284,9 +284,10 @@ func libfuzzerHookEqualFold(string, string, uint) func addCovMeta(p unsafe.Pointer, len uint32, hash [16]byte, pkpath string, pkgId int, cmode uint8, cgran uint8) uint32 // architecture variants +var x86HasAVX bool +var x86HasFMA bool var x86HasPOPCNT bool var x86HasSSE41 bool -var x86HasFMA bool var armHasVFPv4 bool var arm64HasATOMICS bool var loong64HasLAMCAS bool diff --git a/src/cmd/compile/internal/typecheck/builtin.go b/src/cmd/compile/internal/typecheck/builtin.go index 535f0fb7e88..6b8c6d7bad5 100644 --- a/src/cmd/compile/internal/typecheck/builtin.go +++ b/src/cmd/compile/internal/typecheck/builtin.go @@ -232,9 +232,10 @@ var runtimeDecls = [...]struct { {"libfuzzerHookStrCmp", funcTag, 155}, {"libfuzzerHookEqualFold", funcTag, 155}, {"addCovMeta", funcTag, 157}, + {"x86HasAVX", varTag, 6}, + {"x86HasFMA", varTag, 6}, {"x86HasPOPCNT", varTag, 6}, {"x86HasSSE41", varTag, 6}, - {"x86HasFMA", varTag, 6}, {"armHasVFPv4", varTag, 6}, {"arm64HasATOMICS", varTag, 6}, {"loong64HasLAMCAS", varTag, 6}, diff --git a/src/runtime/asm_amd64.s b/src/runtime/asm_amd64.s index cf1d49a4ad8..f8ebd030b61 100644 --- a/src/runtime/asm_amd64.s +++ b/src/runtime/asm_amd64.s @@ -1015,6 +1015,9 @@ needm: // there's no need to handle that. Clear R14 so that there's // a bad value in there, in case needm tries to use it. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 XORQ R14, R14 MOVQ $runtime·needAndBindM(SB), AX CALL AX @@ -1712,6 +1715,9 @@ TEXT ·sigpanic0(SB),NOSPLIT,$0-0 get_tls(R14) MOVQ g(R14), R14 XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 JMP ·sigpanic(SB) // gcWriteBarrier informs the GC about heap pointer writes. diff --git a/src/runtime/cpuflags.go b/src/runtime/cpuflags.go index 6452364b68e..67ed081ef6d 100644 --- a/src/runtime/cpuflags.go +++ b/src/runtime/cpuflags.go @@ -28,9 +28,10 @@ const ( var ( // Set in runtime.cpuinit. // TODO: deprecate these; use internal/cpu directly. + x86HasAVX bool + x86HasFMA bool x86HasPOPCNT bool x86HasSSE41 bool - x86HasFMA bool armHasVFPv4 bool diff --git a/src/runtime/proc.go b/src/runtime/proc.go index 68647d771fe..1d597d59c2f 100644 --- a/src/runtime/proc.go +++ b/src/runtime/proc.go @@ -766,9 +766,10 @@ func cpuinit(env string) { // to guard execution of instructions that can not be assumed to be always supported. switch GOARCH { case "386", "amd64": + x86HasAVX = cpu.X86.HasAVX + x86HasFMA = cpu.X86.HasFMA x86HasPOPCNT = cpu.X86.HasPOPCNT x86HasSSE41 = cpu.X86.HasSSE41 - x86HasFMA = cpu.X86.HasFMA case "arm": armHasVFPv4 = cpu.ARM.HasVFPv4 diff --git a/src/runtime/race_amd64.s b/src/runtime/race_amd64.s index e19118bd54e..23f2e59e3d4 100644 --- a/src/runtime/race_amd64.s +++ b/src/runtime/race_amd64.s @@ -456,6 +456,9 @@ call: // Back to Go world, set special registers. // The g register (R14) is preserved in C. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 RET // C->Go callback thunk that allows to call runtime·racesymbolize from C code. diff --git a/src/runtime/sys_darwin_amd64.s b/src/runtime/sys_darwin_amd64.s index cc4e52d305a..0091546f204 100644 --- a/src/runtime/sys_darwin_amd64.s +++ b/src/runtime/sys_darwin_amd64.s @@ -177,6 +177,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_dragonfly_amd64.s b/src/runtime/sys_dragonfly_amd64.s index a223c2cf76b..84bf326aad3 100644 --- a/src/runtime/sys_dragonfly_amd64.s +++ b/src/runtime/sys_dragonfly_amd64.s @@ -228,6 +228,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_freebsd_amd64.s b/src/runtime/sys_freebsd_amd64.s index 977ea093d24..a1fa3a6fa29 100644 --- a/src/runtime/sys_freebsd_amd64.s +++ b/src/runtime/sys_freebsd_amd64.s @@ -265,6 +265,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking @@ -290,6 +293,9 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_linux_amd64.s b/src/runtime/sys_linux_amd64.s index 941f70b0e8e..02505c2fb0a 100644 --- a/src/runtime/sys_linux_amd64.s +++ b/src/runtime/sys_linux_amd64.s @@ -340,6 +340,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking @@ -365,6 +368,9 @@ TEXT runtime·sigprofNonGoWrapper<>(SB),NOSPLIT|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_netbsd_amd64.s b/src/runtime/sys_netbsd_amd64.s index 2f1ddcdc897..edc7f3d6ee0 100644 --- a/src/runtime/sys_netbsd_amd64.s +++ b/src/runtime/sys_netbsd_amd64.s @@ -310,6 +310,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_openbsd_amd64.s b/src/runtime/sys_openbsd_amd64.s index ff0bc2416aa..734dfe6478e 100644 --- a/src/runtime/sys_openbsd_amd64.s +++ b/src/runtime/sys_openbsd_amd64.s @@ -64,6 +64,9 @@ TEXT runtime·sigtramp(SB),NOSPLIT|TOPFRAME|NOFRAME,$0 get_tls(R12) MOVQ g(R12), R14 PXOR X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 // Reserve space for spill slots. NOP SP // disable vet stack checking diff --git a/src/runtime/sys_windows_amd64.s b/src/runtime/sys_windows_amd64.s index e438599910f..b0b4d3cce65 100644 --- a/src/runtime/sys_windows_amd64.s +++ b/src/runtime/sys_windows_amd64.s @@ -32,6 +32,9 @@ TEXT sigtramp<>(SB),NOSPLIT,$0-0 // R14 is cleared in case there's a non-zero value in there // if called from a non-go thread. XORPS X15, X15 + CMPB internal∕cpu·X86+const_offsetX86HasAVX(SB), $1 + JNE 2(PC) + VXORPS X15, X15, X15 XORQ R14, R14 get_tls(AX)