go/src/syscall/syscall_linux.go

1151 lines
30 KiB
Go
Raw Normal View History

// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Linux system calls.
// This file is compiled as ordinary Go code,
// but it is also input to mksyscall,
// which parses the //sys lines and generates system call stubs.
// Note that sometimes we use a lowercase //sys name and
// wrap it in our own nicer implementation.
package syscall
import (
"internal/itoa"
"unsafe"
)
func rawSyscallNoError(trap, a1, a2, a3 uintptr) (r1, r2 uintptr)
/*
* Wrapped
*/
func Access(path string, mode uint32) (err error) {
return Faccessat(_AT_FDCWD, path, mode, 0)
}
func Chmod(path string, mode uint32) (err error) {
return Fchmodat(_AT_FDCWD, path, mode, 0)
}
func Chown(path string, uid int, gid int) (err error) {
return Fchownat(_AT_FDCWD, path, uid, gid, 0)
}
func Creat(path string, mode uint32) (fd int, err error) {
return Open(path, O_CREAT|O_WRONLY|O_TRUNC, mode)
}
func isGroupMember(gid int) bool {
groups, err := Getgroups()
if err != nil {
return false
}
for _, g := range groups {
if g == gid {
return true
}
}
return false
}
//sys faccessat(dirfd int, path string, mode uint32) (err error)
func Faccessat(dirfd int, path string, mode uint32, flags int) (err error) {
if flags & ^(_AT_SYMLINK_NOFOLLOW|_AT_EACCESS) != 0 {
return EINVAL
}
// The Linux kernel faccessat system call does not take any flags.
// The glibc faccessat implements the flags itself; see
// https://sourceware.org/git/?p=glibc.git;a=blob;f=sysdeps/unix/sysv/linux/faccessat.c;hb=HEAD
// Because people naturally expect syscall.Faccessat to act
// like C faccessat, we do the same.
if flags == 0 {
return faccessat(dirfd, path, mode)
}
var st Stat_t
if err := fstatat(dirfd, path, &st, flags&_AT_SYMLINK_NOFOLLOW); err != nil {
return err
}
mode &= 7
if mode == 0 {
return nil
}
var uid int
if flags&_AT_EACCESS != 0 {
uid = Geteuid()
} else {
uid = Getuid()
}
if uid == 0 {
if mode&1 == 0 {
// Root can read and write any file.
return nil
}
if st.Mode&0111 != 0 {
// Root can execute any file that anybody can execute.
return nil
}
return EACCES
}
var fmode uint32
if uint32(uid) == st.Uid {
fmode = (st.Mode >> 6) & 7
} else {
var gid int
if flags&_AT_EACCESS != 0 {
gid = Getegid()
} else {
gid = Getgid()
}
if uint32(gid) == st.Gid || isGroupMember(gid) {
fmode = (st.Mode >> 3) & 7
} else {
fmode = st.Mode & 7
}
}
if fmode&mode == mode {
return nil
}
return EACCES
}
//sys fchmodat(dirfd int, path string, mode uint32) (err error)
func Fchmodat(dirfd int, path string, mode uint32, flags int) (err error) {
// Linux fchmodat doesn't support the flags parameter. Mimick glibc's behavior
// and check the flags. Otherwise the mode would be applied to the symlink
// destination which is not what the user expects.
if flags&^_AT_SYMLINK_NOFOLLOW != 0 {
return EINVAL
} else if flags&_AT_SYMLINK_NOFOLLOW != 0 {
return EOPNOTSUPP
}
return fchmodat(dirfd, path, mode)
}
//sys linkat(olddirfd int, oldpath string, newdirfd int, newpath string, flags int) (err error)
func Link(oldpath string, newpath string) (err error) {
return linkat(_AT_FDCWD, oldpath, _AT_FDCWD, newpath, 0)
}
func Mkdir(path string, mode uint32) (err error) {
return Mkdirat(_AT_FDCWD, path, mode)
}
func Mknod(path string, mode uint32, dev int) (err error) {
return Mknodat(_AT_FDCWD, path, mode, dev)
}
func Open(path string, mode int, perm uint32) (fd int, err error) {
return openat(_AT_FDCWD, path, mode|O_LARGEFILE, perm)
}
//sys openat(dirfd int, path string, flags int, mode uint32) (fd int, err error)
func Openat(dirfd int, path string, flags int, mode uint32) (fd int, err error) {
return openat(dirfd, path, flags|O_LARGEFILE, mode)
}
func Pipe(p []int) error {
return Pipe2(p, 0)
}
//sysnb pipe2(p *[2]_C_int, flags int) (err error)
func Pipe2(p []int, flags int) error {
if len(p) != 2 {
return EINVAL
}
var pp [2]_C_int
err := pipe2(&pp, flags)
if err == nil {
p[0] = int(pp[0])
p[1] = int(pp[1])
}
return err
}
//sys readlinkat(dirfd int, path string, buf []byte) (n int, err error)
func Readlink(path string, buf []byte) (n int, err error) {
return readlinkat(_AT_FDCWD, path, buf)
}
func Rename(oldpath string, newpath string) (err error) {
return Renameat(_AT_FDCWD, oldpath, _AT_FDCWD, newpath)
}
func Rmdir(path string) error {
return unlinkat(_AT_FDCWD, path, _AT_REMOVEDIR)
}
//sys symlinkat(oldpath string, newdirfd int, newpath string) (err error)
func Symlink(oldpath string, newpath string) (err error) {
return symlinkat(oldpath, _AT_FDCWD, newpath)
}
func Unlink(path string) error {
return unlinkat(_AT_FDCWD, path, 0)
}
//sys unlinkat(dirfd int, path string, flags int) (err error)
func Unlinkat(dirfd int, path string) error {
return unlinkat(dirfd, path, 0)
}
func Utimes(path string, tv []Timeval) (err error) {
if len(tv) != 2 {
return EINVAL
}
return utimes(path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
}
//sys utimensat(dirfd int, path string, times *[2]Timespec, flag int) (err error)
os: Improve the accuracy of os.Chtimes I've been writing some code which involves syncing files (like rsync) and it became apparent that under Linux I could read modification times (os.Lstat) with nanosecond precision but only write them with microsecond precision. This difference in precision is rather annoying when trying to discover whether files need syncing or not! I've patched syscall and os to increases the accuracy of of os.Chtimes for Linux and Windows. This involved exposing the utimensat system call under Linux and a bit of extra code under Windows. I decided not to expose the "at" bit of the system call as it is impossible to replicate under Windows, so the patch adds syscall.Utimens() to all architectures along with a ImplementsUtimens flag. If the utimensat syscall isn't available (utimensat was added to Linux in 2.6.22, Released, 8 July 2007) then it silently falls back to the microsecond accuracy version it uses now. The improved accuracy for Windows should be good for all versions of Windows. Unfortunately Darwin doesn't seem to have a utimensat system call that I could find so I couldn't implement it there. The BSDs do, but since they share their syscall implementation with Darwin I couldn't figure out how to define a syscall for *BSD and not Darwin. I've left this as a TODO in the code. In the process I implemented the missing methods for Timespec under Windows which I needed which just happened to round out the Timespec API for all platforms! ------------------------------------------------------------ Test code: http://play.golang.org/p/1xnGuYOi4b Linux Before (1000 ns precision) $ ./utimetest.linux.before z Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Reading mtime 1344937903123457000: 2012-08-14 10:51:43.123457 +0100 BST Linux After (1 ns precision) $ ./utimetest.linux.after z Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Reading mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Windows Before (1000 ns precision) X:\>utimetest.windows.before.exe c:\Test.txt Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 GMTDT Reading mtime 1344937903123456000: 2012-08-14 10:51:43.123456 +0100 GMTDT Windows After (100 ns precision) X:\>utimetest.windows.after.exe c:\Test.txt Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 GMTDT Reading mtime 1344937903123456700: 2012-08-14 10:51:43.1234567 +0100 GMTDT R=golang-dev, alex.brainman, rsc, bradfitz CC=golang-dev https://golang.org/cl/6905057
2012-12-13 13:02:39 -08:00
func UtimesNano(path string, ts []Timespec) (err error) {
if len(ts) != 2 {
return EINVAL
}
return utimensat(_AT_FDCWD, path, (*[2]Timespec)(unsafe.Pointer(&ts[0])), 0)
os: Improve the accuracy of os.Chtimes I've been writing some code which involves syncing files (like rsync) and it became apparent that under Linux I could read modification times (os.Lstat) with nanosecond precision but only write them with microsecond precision. This difference in precision is rather annoying when trying to discover whether files need syncing or not! I've patched syscall and os to increases the accuracy of of os.Chtimes for Linux and Windows. This involved exposing the utimensat system call under Linux and a bit of extra code under Windows. I decided not to expose the "at" bit of the system call as it is impossible to replicate under Windows, so the patch adds syscall.Utimens() to all architectures along with a ImplementsUtimens flag. If the utimensat syscall isn't available (utimensat was added to Linux in 2.6.22, Released, 8 July 2007) then it silently falls back to the microsecond accuracy version it uses now. The improved accuracy for Windows should be good for all versions of Windows. Unfortunately Darwin doesn't seem to have a utimensat system call that I could find so I couldn't implement it there. The BSDs do, but since they share their syscall implementation with Darwin I couldn't figure out how to define a syscall for *BSD and not Darwin. I've left this as a TODO in the code. In the process I implemented the missing methods for Timespec under Windows which I needed which just happened to round out the Timespec API for all platforms! ------------------------------------------------------------ Test code: http://play.golang.org/p/1xnGuYOi4b Linux Before (1000 ns precision) $ ./utimetest.linux.before z Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Reading mtime 1344937903123457000: 2012-08-14 10:51:43.123457 +0100 BST Linux After (1 ns precision) $ ./utimetest.linux.after z Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Reading mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 BST Windows Before (1000 ns precision) X:\>utimetest.windows.before.exe c:\Test.txt Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 GMTDT Reading mtime 1344937903123456000: 2012-08-14 10:51:43.123456 +0100 GMTDT Windows After (100 ns precision) X:\>utimetest.windows.after.exe c:\Test.txt Setting mtime 1344937903123456789: 2012-08-14 10:51:43.123456789 +0100 GMTDT Reading mtime 1344937903123456700: 2012-08-14 10:51:43.1234567 +0100 GMTDT R=golang-dev, alex.brainman, rsc, bradfitz CC=golang-dev https://golang.org/cl/6905057
2012-12-13 13:02:39 -08:00
}
func Futimesat(dirfd int, path string, tv []Timeval) (err error) {
if len(tv) != 2 {
return EINVAL
}
return futimesat(dirfd, path, (*[2]Timeval)(unsafe.Pointer(&tv[0])))
}
func Futimes(fd int, tv []Timeval) (err error) {
// Believe it or not, this is the best we can do on Linux
// (and is what glibc does).
return Utimes("/proc/self/fd/"+itoa.Itoa(fd), tv)
}
const ImplementsGetwd = true
//sys Getcwd(buf []byte) (n int, err error)
func Getwd() (wd string, err error) {
var buf [PathMax]byte
n, err := Getcwd(buf[0:])
if err != nil {
return "", err
}
// Getcwd returns the number of bytes written to buf, including the NUL.
if n < 1 || n > len(buf) || buf[n-1] != 0 {
return "", EINVAL
}
return string(buf[0 : n-1]), nil
}
func Getgroups() (gids []int, err error) {
n, err := getgroups(0, nil)
if err != nil {
return nil, err
}
if n == 0 {
return nil, nil
}
// Sanity check group count. Max is 1<<16 on Linux.
if n < 0 || n > 1<<20 {
return nil, EINVAL
}
a := make([]_Gid_t, n)
n, err = getgroups(n, &a[0])
if err != nil {
return nil, err
}
gids = make([]int, n)
for i, v := range a[0:n] {
gids[i] = int(v)
}
return
}
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
var cgo_libc_setgroups unsafe.Pointer // non-nil if cgo linked.
func Setgroups(gids []int) (err error) {
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
n := uintptr(len(gids))
if n == 0 {
if cgo_libc_setgroups == nil {
if _, _, e1 := AllThreadsSyscall(_SYS_setgroups, 0, 0, 0); e1 != 0 {
err = errnoErr(e1)
}
return
}
if ret := cgocaller(cgo_libc_setgroups, 0, 0); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
a := make([]_Gid_t, len(gids))
for i, v := range gids {
a[i] = _Gid_t(v)
}
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
if cgo_libc_setgroups == nil {
if _, _, e1 := AllThreadsSyscall(_SYS_setgroups, n, uintptr(unsafe.Pointer(&a[0])), 0); e1 != 0 {
err = errnoErr(e1)
}
return
}
if ret := cgocaller(cgo_libc_setgroups, n, uintptr(unsafe.Pointer(&a[0]))); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
type WaitStatus uint32
// Wait status is 7 bits at bottom, either 0 (exited),
// 0x7F (stopped), or a signal number that caused an exit.
// The 0x80 bit is whether there was a core dump.
// An extra number (exit code, signal causing a stop)
// is in the high bits. At least that's the idea.
// There are various irregularities. For example, the
// "continued" status is 0xFFFF, distinguishing itself
// from stopped via the core dump bit.
const (
mask = 0x7F
core = 0x80
exited = 0x00
stopped = 0x7F
shift = 8
)
func (w WaitStatus) Exited() bool { return w&mask == exited }
func (w WaitStatus) Signaled() bool { return w&mask != stopped && w&mask != exited }
func (w WaitStatus) Stopped() bool { return w&0xFF == stopped }
func (w WaitStatus) Continued() bool { return w == 0xFFFF }
func (w WaitStatus) CoreDump() bool { return w.Signaled() && w&core != 0 }
func (w WaitStatus) ExitStatus() int {
if !w.Exited() {
return -1
}
return int(w>>shift) & 0xFF
}
func (w WaitStatus) Signal() Signal {
if !w.Signaled() {
return -1
}
return Signal(w & mask)
}
func (w WaitStatus) StopSignal() Signal {
if !w.Stopped() {
return -1
}
return Signal(w>>shift) & 0xFF
}
func (w WaitStatus) TrapCause() int {
if w.StopSignal() != SIGTRAP {
return -1
}
return int(w>>shift) >> 8
}
//sys wait4(pid int, wstatus *_C_int, options int, rusage *Rusage) (wpid int, err error)
func Wait4(pid int, wstatus *WaitStatus, options int, rusage *Rusage) (wpid int, err error) {
var status _C_int
wpid, err = wait4(pid, &status, options, rusage)
if wstatus != nil {
*wstatus = WaitStatus(status)
}
return
}
func Mkfifo(path string, mode uint32) (err error) {
return Mknod(path, mode|S_IFIFO, 0)
}
func (sa *SockaddrInet4) sockaddr() (unsafe.Pointer, _Socklen, error) {
if sa.Port < 0 || sa.Port > 0xFFFF {
return nil, 0, EINVAL
}
sa.raw.Family = AF_INET
p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
p[0] = byte(sa.Port >> 8)
p[1] = byte(sa.Port)
sa.raw.Addr = sa.Addr
return unsafe.Pointer(&sa.raw), SizeofSockaddrInet4, nil
}
func (sa *SockaddrInet6) sockaddr() (unsafe.Pointer, _Socklen, error) {
if sa.Port < 0 || sa.Port > 0xFFFF {
return nil, 0, EINVAL
}
sa.raw.Family = AF_INET6
p := (*[2]byte)(unsafe.Pointer(&sa.raw.Port))
p[0] = byte(sa.Port >> 8)
p[1] = byte(sa.Port)
sa.raw.Scope_id = sa.ZoneId
sa.raw.Addr = sa.Addr
return unsafe.Pointer(&sa.raw), SizeofSockaddrInet6, nil
}
func (sa *SockaddrUnix) sockaddr() (unsafe.Pointer, _Socklen, error) {
name := sa.Name
n := len(name)
if n > len(sa.raw.Path) {
return nil, 0, EINVAL
}
if n == len(sa.raw.Path) && name[0] != '@' {
return nil, 0, EINVAL
}
sa.raw.Family = AF_UNIX
for i := 0; i < n; i++ {
sa.raw.Path[i] = int8(name[i])
}
// length is family (uint16), name, NUL.
sl := _Socklen(2)
if n > 0 {
sl += _Socklen(n) + 1
}
if sa.raw.Path[0] == '@' {
sa.raw.Path[0] = 0
// Don't count trailing NUL for abstract address.
sl--
}
return unsafe.Pointer(&sa.raw), sl, nil
}
type SockaddrLinklayer struct {
Protocol uint16
Ifindex int
Hatype uint16
Pkttype uint8
Halen uint8
Addr [8]byte
raw RawSockaddrLinklayer
}
func (sa *SockaddrLinklayer) sockaddr() (unsafe.Pointer, _Socklen, error) {
if sa.Ifindex < 0 || sa.Ifindex > 0x7fffffff {
return nil, 0, EINVAL
}
sa.raw.Family = AF_PACKET
sa.raw.Protocol = sa.Protocol
sa.raw.Ifindex = int32(sa.Ifindex)
sa.raw.Hatype = sa.Hatype
sa.raw.Pkttype = sa.Pkttype
sa.raw.Halen = sa.Halen
sa.raw.Addr = sa.Addr
return unsafe.Pointer(&sa.raw), SizeofSockaddrLinklayer, nil
}
type SockaddrNetlink struct {
Family uint16
Pad uint16
Pid uint32
Groups uint32
raw RawSockaddrNetlink
}
func (sa *SockaddrNetlink) sockaddr() (unsafe.Pointer, _Socklen, error) {
sa.raw.Family = AF_NETLINK
sa.raw.Pad = sa.Pad
sa.raw.Pid = sa.Pid
sa.raw.Groups = sa.Groups
return unsafe.Pointer(&sa.raw), SizeofSockaddrNetlink, nil
}
func anyToSockaddr(rsa *RawSockaddrAny) (Sockaddr, error) {
switch rsa.Addr.Family {
case AF_NETLINK:
pp := (*RawSockaddrNetlink)(unsafe.Pointer(rsa))
sa := new(SockaddrNetlink)
sa.Family = pp.Family
sa.Pad = pp.Pad
sa.Pid = pp.Pid
sa.Groups = pp.Groups
return sa, nil
case AF_PACKET:
pp := (*RawSockaddrLinklayer)(unsafe.Pointer(rsa))
sa := new(SockaddrLinklayer)
sa.Protocol = pp.Protocol
sa.Ifindex = int(pp.Ifindex)
sa.Hatype = pp.Hatype
sa.Pkttype = pp.Pkttype
sa.Halen = pp.Halen
sa.Addr = pp.Addr
return sa, nil
case AF_UNIX:
pp := (*RawSockaddrUnix)(unsafe.Pointer(rsa))
sa := new(SockaddrUnix)
if pp.Path[0] == 0 {
// "Abstract" Unix domain socket.
// Rewrite leading NUL as @ for textual display.
// (This is the standard convention.)
// Not friendly to overwrite in place,
// but the callers below don't care.
pp.Path[0] = '@'
}
// Assume path ends at NUL.
// This is not technically the Linux semantics for
// abstract Unix domain sockets--they are supposed
// to be uninterpreted fixed-size binary blobs--but
// everyone uses this convention.
n := 0
for n < len(pp.Path) && pp.Path[n] != 0 {
n++
}
bytes := (*[len(pp.Path)]byte)(unsafe.Pointer(&pp.Path[0]))[0:n]
sa.Name = string(bytes)
return sa, nil
case AF_INET:
pp := (*RawSockaddrInet4)(unsafe.Pointer(rsa))
sa := new(SockaddrInet4)
p := (*[2]byte)(unsafe.Pointer(&pp.Port))
sa.Port = int(p[0])<<8 + int(p[1])
sa.Addr = pp.Addr
return sa, nil
case AF_INET6:
pp := (*RawSockaddrInet6)(unsafe.Pointer(rsa))
sa := new(SockaddrInet6)
p := (*[2]byte)(unsafe.Pointer(&pp.Port))
sa.Port = int(p[0])<<8 + int(p[1])
sa.ZoneId = pp.Scope_id
sa.Addr = pp.Addr
return sa, nil
}
return nil, EAFNOSUPPORT
}
func Accept(fd int) (nfd int, sa Sockaddr, err error) {
var rsa RawSockaddrAny
var len _Socklen = SizeofSockaddrAny
nfd, err = accept4(fd, &rsa, &len, 0)
if err != nil {
return
}
sa, err = anyToSockaddr(&rsa)
if err != nil {
Close(nfd)
nfd = 0
}
return
}
func Accept4(fd int, flags int) (nfd int, sa Sockaddr, err error) {
var rsa RawSockaddrAny
var len _Socklen = SizeofSockaddrAny
nfd, err = accept4(fd, &rsa, &len, flags)
if err != nil {
return
}
if len > SizeofSockaddrAny {
panic("RawSockaddrAny too small")
}
sa, err = anyToSockaddr(&rsa)
if err != nil {
Close(nfd)
nfd = 0
}
return
}
func Getsockname(fd int) (sa Sockaddr, err error) {
var rsa RawSockaddrAny
var len _Socklen = SizeofSockaddrAny
if err = getsockname(fd, &rsa, &len); err != nil {
return
}
return anyToSockaddr(&rsa)
}
func GetsockoptInet4Addr(fd, level, opt int) (value [4]byte, err error) {
vallen := _Socklen(4)
err = getsockopt(fd, level, opt, unsafe.Pointer(&value[0]), &vallen)
return value, err
}
func GetsockoptIPMreq(fd, level, opt int) (*IPMreq, error) {
var value IPMreq
vallen := _Socklen(SizeofIPMreq)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func GetsockoptIPMreqn(fd, level, opt int) (*IPMreqn, error) {
var value IPMreqn
vallen := _Socklen(SizeofIPMreqn)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func GetsockoptIPv6Mreq(fd, level, opt int) (*IPv6Mreq, error) {
var value IPv6Mreq
vallen := _Socklen(SizeofIPv6Mreq)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func GetsockoptIPv6MTUInfo(fd, level, opt int) (*IPv6MTUInfo, error) {
var value IPv6MTUInfo
vallen := _Socklen(SizeofIPv6MTUInfo)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func GetsockoptICMPv6Filter(fd, level, opt int) (*ICMPv6Filter, error) {
var value ICMPv6Filter
vallen := _Socklen(SizeofICMPv6Filter)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func GetsockoptUcred(fd, level, opt int) (*Ucred, error) {
var value Ucred
vallen := _Socklen(SizeofUcred)
err := getsockopt(fd, level, opt, unsafe.Pointer(&value), &vallen)
return &value, err
}
func SetsockoptIPMreqn(fd, level, opt int, mreq *IPMreqn) (err error) {
return setsockopt(fd, level, opt, unsafe.Pointer(mreq), unsafe.Sizeof(*mreq))
}
func recvmsgRaw(fd int, p, oob []byte, flags int, rsa *RawSockaddrAny) (n, oobn int, recvflags int, err error) {
var msg Msghdr
msg.Name = (*byte)(unsafe.Pointer(rsa))
msg.Namelen = uint32(SizeofSockaddrAny)
var iov Iovec
if len(p) > 0 {
iov.Base = &p[0]
iov.SetLen(len(p))
}
var dummy byte
if len(oob) > 0 {
if len(p) == 0 {
var sockType int
sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
if err != nil {
return
}
// receive at least one normal byte
if sockType != SOCK_DGRAM {
iov.Base = &dummy
iov.SetLen(1)
}
}
msg.Control = &oob[0]
msg.SetControllen(len(oob))
}
msg.Iov = &iov
msg.Iovlen = 1
if n, err = recvmsg(fd, &msg, flags); err != nil {
return
}
oobn = int(msg.Controllen)
recvflags = int(msg.Flags)
return
}
func sendmsgN(fd int, p, oob []byte, ptr unsafe.Pointer, salen _Socklen, flags int) (n int, err error) {
var msg Msghdr
msg.Name = (*byte)(ptr)
msg.Namelen = uint32(salen)
var iov Iovec
if len(p) > 0 {
iov.Base = &p[0]
iov.SetLen(len(p))
}
var dummy byte
if len(oob) > 0 {
if len(p) == 0 {
var sockType int
sockType, err = GetsockoptInt(fd, SOL_SOCKET, SO_TYPE)
if err != nil {
return 0, err
}
// send at least one normal byte
if sockType != SOCK_DGRAM {
iov.Base = &dummy
iov.SetLen(1)
}
}
msg.Control = &oob[0]
msg.SetControllen(len(oob))
}
msg.Iov = &iov
msg.Iovlen = 1
if n, err = sendmsg(fd, &msg, flags); err != nil {
return 0, err
}
if len(oob) > 0 && len(p) == 0 {
n = 0
}
return n, nil
}
// BindToDevice binds the socket associated with fd to device.
func BindToDevice(fd int, device string) (err error) {
return SetsockoptString(fd, SOL_SOCKET, SO_BINDTODEVICE, device)
}
//sys ptrace(request int, pid int, addr uintptr, data uintptr) (err error)
func ptracePeek(req int, pid int, addr uintptr, out []byte) (count int, err error) {
// The peek requests are machine-size oriented, so we wrap it
// to retrieve arbitrary-length data.
// The ptrace syscall differs from glibc's ptrace.
// Peeks returns the word in *data, not as the return value.
var buf [sizeofPtr]byte
// Leading edge. PEEKTEXT/PEEKDATA don't require aligned
// access (PEEKUSER warns that it might), but if we don't
// align our reads, we might straddle an unmapped page
// boundary and not get the bytes leading up to the page
// boundary.
n := 0
if addr%sizeofPtr != 0 {
err = ptrace(req, pid, addr-addr%sizeofPtr, uintptr(unsafe.Pointer(&buf[0])))
if err != nil {
return 0, err
}
n += copy(out, buf[addr%sizeofPtr:])
out = out[n:]
}
// Remainder.
for len(out) > 0 {
// We use an internal buffer to guarantee alignment.
// It's not documented if this is necessary, but we're paranoid.
err = ptrace(req, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0])))
if err != nil {
return n, err
}
copied := copy(out, buf[0:])
n += copied
out = out[copied:]
}
return n, nil
}
func PtracePeekText(pid int, addr uintptr, out []byte) (count int, err error) {
return ptracePeek(PTRACE_PEEKTEXT, pid, addr, out)
}
func PtracePeekData(pid int, addr uintptr, out []byte) (count int, err error) {
return ptracePeek(PTRACE_PEEKDATA, pid, addr, out)
}
func ptracePoke(pokeReq int, peekReq int, pid int, addr uintptr, data []byte) (count int, err error) {
// As for ptracePeek, we need to align our accesses to deal
// with the possibility of straddling an invalid page.
// Leading edge.
n := 0
if addr%sizeofPtr != 0 {
var buf [sizeofPtr]byte
err = ptrace(peekReq, pid, addr-addr%sizeofPtr, uintptr(unsafe.Pointer(&buf[0])))
if err != nil {
return 0, err
}
n += copy(buf[addr%sizeofPtr:], data)
word := *((*uintptr)(unsafe.Pointer(&buf[0])))
err = ptrace(pokeReq, pid, addr-addr%sizeofPtr, word)
if err != nil {
return 0, err
}
data = data[n:]
}
// Interior.
for len(data) > sizeofPtr {
word := *((*uintptr)(unsafe.Pointer(&data[0])))
err = ptrace(pokeReq, pid, addr+uintptr(n), word)
if err != nil {
return n, err
}
n += sizeofPtr
data = data[sizeofPtr:]
}
// Trailing edge.
if len(data) > 0 {
var buf [sizeofPtr]byte
err = ptrace(peekReq, pid, addr+uintptr(n), uintptr(unsafe.Pointer(&buf[0])))
if err != nil {
return n, err
}
copy(buf[0:], data)
word := *((*uintptr)(unsafe.Pointer(&buf[0])))
err = ptrace(pokeReq, pid, addr+uintptr(n), word)
if err != nil {
return n, err
}
n += len(data)
}
return n, nil
}
func PtracePokeText(pid int, addr uintptr, data []byte) (count int, err error) {
return ptracePoke(PTRACE_POKETEXT, PTRACE_PEEKTEXT, pid, addr, data)
}
func PtracePokeData(pid int, addr uintptr, data []byte) (count int, err error) {
return ptracePoke(PTRACE_POKEDATA, PTRACE_PEEKDATA, pid, addr, data)
}
func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout)))
}
func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs)))
}
func PtraceSetOptions(pid int, options int) (err error) {
return ptrace(PTRACE_SETOPTIONS, pid, 0, uintptr(options))
}
func PtraceGetEventMsg(pid int) (msg uint, err error) {
var data _C_long
err = ptrace(PTRACE_GETEVENTMSG, pid, 0, uintptr(unsafe.Pointer(&data)))
msg = uint(data)
return
}
func PtraceCont(pid int, signal int) (err error) {
return ptrace(PTRACE_CONT, pid, 0, uintptr(signal))
}
func PtraceSyscall(pid int, signal int) (err error) {
return ptrace(PTRACE_SYSCALL, pid, 0, uintptr(signal))
}
func PtraceSingleStep(pid int) (err error) { return ptrace(PTRACE_SINGLESTEP, pid, 0, 0) }
func PtraceAttach(pid int) (err error) { return ptrace(PTRACE_ATTACH, pid, 0, 0) }
func PtraceDetach(pid int) (err error) { return ptrace(PTRACE_DETACH, pid, 0, 0) }
//sys reboot(magic1 uint, magic2 uint, cmd int, arg string) (err error)
func Reboot(cmd int) (err error) {
return reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, "")
}
func ReadDirent(fd int, buf []byte) (n int, err error) {
return Getdents(fd, buf)
}
func direntIno(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(Dirent{}.Ino), unsafe.Sizeof(Dirent{}.Ino))
}
func direntReclen(buf []byte) (uint64, bool) {
return readInt(buf, unsafe.Offsetof(Dirent{}.Reclen), unsafe.Sizeof(Dirent{}.Reclen))
}
func direntNamlen(buf []byte) (uint64, bool) {
reclen, ok := direntReclen(buf)
if !ok {
return 0, false
}
return reclen - uint64(unsafe.Offsetof(Dirent{}.Name)), true
}
//sys mount(source string, target string, fstype string, flags uintptr, data *byte) (err error)
func Mount(source string, target string, fstype string, flags uintptr, data string) (err error) {
// Certain file systems get rather angry and EINVAL if you give
// them an empty string of data, rather than NULL.
if data == "" {
return mount(source, target, fstype, flags, nil)
}
datap, err := BytePtrFromString(data)
if err != nil {
return err
}
return mount(source, target, fstype, flags, datap)
}
// Sendto
// Recvfrom
// Socketpair
/*
* Direct access
*/
//sys Acct(path string) (err error)
//sys Adjtimex(buf *Timex) (state int, err error)
//sys Chdir(path string) (err error)
//sys Chroot(path string) (err error)
//sys Close(fd int) (err error)
//sys Dup(oldfd int) (fd int, err error)
//sys Dup3(oldfd int, newfd int, flags int) (err error)
//sysnb EpollCreate1(flag int) (fd int, err error)
//sysnb EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error)
//sys Fallocate(fd int, mode uint32, off int64, len int64) (err error)
//sys Fchdir(fd int) (err error)
//sys Fchmod(fd int, mode uint32) (err error)
//sys Fchownat(dirfd int, path string, uid int, gid int, flags int) (err error)
//sys fcntl(fd int, cmd int, arg int) (val int, err error)
//sys Fdatasync(fd int) (err error)
//sys Flock(fd int, how int) (err error)
//sys Fsync(fd int) (err error)
//sys Getdents(fd int, buf []byte) (n int, err error) = SYS_GETDENTS64
//sysnb Getpgid(pid int) (pgid int, err error)
func Getpgrp() (pid int) {
pid, _ = Getpgid(0)
return
}
//sysnb Getpid() (pid int)
//sysnb Getppid() (ppid int)
//sys Getpriority(which int, who int) (prio int, err error)
//sysnb Getrusage(who int, rusage *Rusage) (err error)
//sysnb Gettid() (tid int)
//sys Getxattr(path string, attr string, dest []byte) (sz int, err error)
//sys InotifyAddWatch(fd int, pathname string, mask uint32) (watchdesc int, err error)
//sysnb InotifyInit1(flags int) (fd int, err error)
//sysnb InotifyRmWatch(fd int, watchdesc uint32) (success int, err error)
//sysnb Kill(pid int, sig Signal) (err error)
//sys Klogctl(typ int, buf []byte) (n int, err error) = SYS_SYSLOG
//sys Listxattr(path string, dest []byte) (sz int, err error)
//sys Mkdirat(dirfd int, path string, mode uint32) (err error)
//sys Mknodat(dirfd int, path string, mode uint32, dev int) (err error)
//sys Nanosleep(time *Timespec, leftover *Timespec) (err error)
//sys PivotRoot(newroot string, putold string) (err error) = SYS_PIVOT_ROOT
//sysnb prlimit(pid int, resource int, newlimit *Rlimit, old *Rlimit) (err error) = SYS_PRLIMIT64
//sys read(fd int, p []byte) (n int, err error)
//sys Removexattr(path string, attr string) (err error)
//sys Setdomainname(p []byte) (err error)
//sys Sethostname(p []byte) (err error)
//sysnb Setpgid(pid int, pgid int) (err error)
//sysnb Setsid() (pid int, err error)
//sysnb Settimeofday(tv *Timeval) (err error)
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
// Provided by runtime.syscall_runtime_doAllThreadsSyscall which stops the
// world and invokes the syscall on each OS thread. Once this function returns,
// all threads are in sync.
//go:uintptrescapes
func runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2, err uintptr)
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
// AllThreadsSyscall performs a syscall on each OS thread of the Go
// runtime. It first invokes the syscall on one thread. Should that
// invocation fail, it returns immediately with the error status.
// Otherwise, it invokes the syscall on all of the remaining threads
// in parallel. It will terminate the program if it observes any
// invoked syscall's return value differs from that of the first
// invocation.
//
// AllThreadsSyscall is intended for emulating simultaneous
// process-wide state changes that require consistently modifying
// per-thread state of the Go runtime.
//
// AllThreadsSyscall is unaware of any threads that are launched
// explicitly by cgo linked code, so the function always returns
// ENOTSUP in binaries that use cgo.
//go:uintptrescapes
func AllThreadsSyscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err Errno) {
if cgo_libc_setegid != nil {
return minus1, minus1, ENOTSUP
}
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
r1, r2, errno := runtime_doAllThreadsSyscall(trap, a1, a2, a3, 0, 0, 0)
return r1, r2, Errno(errno)
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
}
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
// AllThreadsSyscall6 is like AllThreadsSyscall, but extended to six
// arguments.
//go:uintptrescapes
func AllThreadsSyscall6(trap, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno) {
if cgo_libc_setegid != nil {
return minus1, minus1, ENOTSUP
}
runtime, syscall: reimplement AllThreadsSyscall using only signals. In issue 50113, we see that a thread blocked in a system call can result in a hang of AllThreadsSyscall. To resolve this, we must send a signal to these threads to knock them out of the system call long enough to run the per-thread syscall. Stepping back, if we need to send signals anyway, it should be possible to implement this entire mechanism on top of signals. This CL does so, vastly simplifying the mechanism, both as a direct result of newly-unnecessary code as well as some ancillary simplifications to make things simpler to follow. Major changes: * The rest of the mechanism is moved to os_linux.go, with fields in mOS instead of m itself. * 'Fixup' fields and functions are renamed to 'perThreadSyscall' so they are more precise about their purpose. * Rather than getting passed a closure, doAllThreadsSyscall takes the syscall number and arguments. This avoids a lot of hairy behavior: * The closure may potentially only be live in fields in the M, hidden from the GC. Not necessary with no closure. * The need to loan out the race context. A direct RawSyscall6 call does not require any race context. * The closure previously conditionally panicked in strange locations, like a signal handler. Now we simply throw. * All manual fixup synchronization with mPark, sysmon, templateThread, sigqueue, etc is gone. The core approach is much simpler: doAllThreadsSyscall sends a signal to every thread in allm, which executes the system call from the signal handler. We use (SIGRTMIN + 1), aka SIGSETXID, the same signal used by glibc for this purpose. As such, we are careful to only handle this signal on non-cgo binaries. Synchronization with thread creation is a key part of this CL. The comment near the top of doAllThreadsSyscall describes the required synchronization semantics and how they are achieved. Note that current use of allocmLock protects the state mutations of allm that are also protected by sched.lock. allocmLock is used instead of sched.lock simply to avoid holding sched.lock for so long. Fixes #50113 Change-Id: Ic7ea856dc66cf711731540a54996e08fc986ce84 Reviewed-on: https://go-review.googlesource.com/c/go/+/383434 Reviewed-by: Austin Clements <austin@google.com> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Michael Pratt <mpratt@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
2022-02-04 17:15:28 -05:00
r1, r2, errno := runtime_doAllThreadsSyscall(trap, a1, a2, a3, a4, a5, a6)
return r1, r2, Errno(errno)
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
}
// linked by runtime.cgocall.go
//go:uintptrescapes
func cgocaller(unsafe.Pointer, ...uintptr) uintptr
var cgo_libc_setegid unsafe.Pointer // non-nil if cgo linked.
const minus1 = ^uintptr(0)
func Setegid(egid int) (err error) {
if cgo_libc_setegid == nil {
if _, _, e1 := AllThreadsSyscall(SYS_SETRESGID, minus1, uintptr(egid), minus1); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setegid, uintptr(egid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_seteuid unsafe.Pointer // non-nil if cgo linked.
func Seteuid(euid int) (err error) {
if cgo_libc_seteuid == nil {
if _, _, e1 := AllThreadsSyscall(SYS_SETRESUID, minus1, uintptr(euid), minus1); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_seteuid, uintptr(euid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
var cgo_libc_setgid unsafe.Pointer // non-nil if cgo linked.
func Setgid(gid int) (err error) {
syscall: support POSIX semantics for Linux syscalls This change adds two new methods for invoking system calls under Linux: syscall.AllThreadsSyscall() and syscall.AllThreadsSyscall6(). These system call wrappers ensure that all OSThreads mirror a common system call. The wrappers serialize execution of the runtime to ensure no race conditions where any Go code observes a non-atomic OS state change. As such, the syscalls have higher runtime overhead than regular system calls, and only need to be used where such thread (or 'm' in the parlance of the runtime sources) consistency is required. The new support is used to enable these functions under Linux: syscall.Setegid(), syscall.Seteuid(), syscall.Setgroups(), syscall.Setgid(), syscall.Setregid(), syscall.Setreuid(), syscall.Setresgid(), syscall.Setresuid() and syscall.Setuid(). They work identically to their glibc counterparts. Extensive discussion of the background issue addressed in this patch can be found here: https://github.com/golang/go/issues/1435 In the case where cgo is used, the C runtime can launch pthreads that are not managed by the Go runtime. As such, the added syscall.AllThreadsSyscall*() return ENOTSUP when cgo is enabled. However, for the 9 syscall.Set*() functions listed above, when cgo is active, these functions redirect to invoke their C.set*() equivalents in glibc, which wraps the raw system calls with a nptl:setxid fixup mechanism. This achieves POSIX semantics for these functions in the combined Go and C runtime. As a side note, the glibc/nptl:setxid support (2019-11-30) does not extend to all security related system calls under Linux so using native Go (CGO_ENABLED=0) and these AllThreadsSyscall*()s, where needed, will yield more well defined/consistent behavior over all threads of a Go program. That is, using the syscall.AllThreadsSyscall*() wrappers for things like setting state through SYS_PRCTL and SYS_CAPSET etc. Fixes #1435 Change-Id: Ib1a3e16b9180f64223196a32fc0f9dce14d9105c Reviewed-on: https://go-review.googlesource.com/c/go/+/210639 Trust: Emmanuel Odeke <emm.odeke@gmail.com> Trust: Ian Lance Taylor <iant@golang.org> Trust: Michael Pratt <mpratt@google.com> Run-TryBot: Emmanuel Odeke <emm.odeke@gmail.com> Reviewed-by: Michael Pratt <mpratt@google.com> Reviewed-by: Austin Clements <austin@google.com>
2019-12-09 21:50:16 -08:00
if cgo_libc_setgid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETGID, uintptr(gid), 0, 0); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setgid, uintptr(gid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_setregid unsafe.Pointer // non-nil if cgo linked.
func Setregid(rgid, egid int) (err error) {
if cgo_libc_setregid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETREGID, uintptr(rgid), uintptr(egid), 0); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setregid, uintptr(rgid), uintptr(egid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_setresgid unsafe.Pointer // non-nil if cgo linked.
func Setresgid(rgid, egid, sgid int) (err error) {
if cgo_libc_setresgid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETRESGID, uintptr(rgid), uintptr(egid), uintptr(sgid)); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setresgid, uintptr(rgid), uintptr(egid), uintptr(sgid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_setresuid unsafe.Pointer // non-nil if cgo linked.
func Setresuid(ruid, euid, suid int) (err error) {
if cgo_libc_setresuid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETRESUID, uintptr(ruid), uintptr(euid), uintptr(suid)); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setresuid, uintptr(ruid), uintptr(euid), uintptr(suid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_setreuid unsafe.Pointer // non-nil if cgo linked.
func Setreuid(ruid, euid int) (err error) {
if cgo_libc_setreuid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETREUID, uintptr(ruid), uintptr(euid), 0); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setreuid, uintptr(ruid), uintptr(euid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
var cgo_libc_setuid unsafe.Pointer // non-nil if cgo linked.
func Setuid(uid int) (err error) {
if cgo_libc_setuid == nil {
if _, _, e1 := AllThreadsSyscall(sys_SETUID, uintptr(uid), 0, 0); e1 != 0 {
err = errnoErr(e1)
}
} else if ret := cgocaller(cgo_libc_setuid, uintptr(uid)); ret != 0 {
err = errnoErr(Errno(ret))
}
return
}
//sys Setpriority(which int, who int, prio int) (err error)
//sys Setxattr(path string, attr string, data []byte, flags int) (err error)
//sys Sync()
//sysnb Sysinfo(info *Sysinfo_t) (err error)
//sys Tee(rfd int, wfd int, len int, flags int) (n int64, err error)
//sysnb Tgkill(tgid int, tid int, sig Signal) (err error)
//sysnb Times(tms *Tms) (ticks uintptr, err error)
//sysnb Umask(mask int) (oldmask int)
//sysnb Uname(buf *Utsname) (err error)
//sys Unmount(target string, flags int) (err error) = SYS_UMOUNT2
//sys Unshare(flags int) (err error)
//sys write(fd int, p []byte) (n int, err error)
//sys exitThread(code int) (err error) = SYS_EXIT
//sys readlen(fd int, p *byte, np int) (n int, err error) = SYS_READ
//sys writelen(fd int, p *byte, np int) (n int, err error) = SYS_WRITE
// mmap varies by architecture; see syscall_linux_*.go.
//sys munmap(addr uintptr, length uintptr) (err error)
var mapper = &mmapper{
active: make(map[*byte][]byte),
mmap: mmap,
munmap: munmap,
}
func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) {
return mapper.Mmap(fd, offset, length, prot, flags)
}
func Munmap(b []byte) (err error) {
return mapper.Munmap(b)
}
//sys Madvise(b []byte, advice int) (err error)
//sys Mprotect(b []byte, prot int) (err error)
//sys Mlock(b []byte) (err error)
//sys Munlock(b []byte) (err error)
//sys Mlockall(flags int) (err error)
//sys Munlockall() (err error)