diff --git a/src/runtime/export_test.go b/src/runtime/export_test.go index 961c1c1a268..81200da9158 100644 --- a/src/runtime/export_test.go +++ b/src/runtime/export_test.go @@ -1233,7 +1233,9 @@ func (th *TimeHistogram) Record(duration int64) { func SetIntArgRegs(a int) int { lock(&finlock) old := intArgRegs - intArgRegs = a + if a >= 0 { + intArgRegs = a + } unlock(&finlock) return old } diff --git a/src/runtime/syscall_windows.go b/src/runtime/syscall_windows.go index 191892f047c..6521bb2c418 100644 --- a/src/runtime/syscall_windows.go +++ b/src/runtime/syscall_windows.go @@ -22,30 +22,178 @@ var cbs struct { type winCallback struct { fn *funcval // Go function retPop uintptr // For 386 cdecl, how many bytes to pop on return + abiMap abiDesc +} + +// abiPartKind is the action an abiPart should take. +type abiPartKind int + +const ( + abiPartBad abiPartKind = iota + abiPartStack // Move a value from memory to the stack. + abiPartReg // Move a value from memory to a register. +) + +// abiPart encodes a step in translating between calling ABIs. +type abiPart struct { + kind abiPartKind + srcStackOffset uintptr + dstStackOffset uintptr // used if kind == abiPartStack + dstRegister int // used if kind == abiPartReg + len uintptr +} + +func (a *abiPart) tryMerge(b abiPart) bool { + if a.kind != abiPartStack || b.kind != abiPartStack { + return false + } + if a.srcStackOffset+a.len == b.srcStackOffset && a.dstStackOffset+a.len == b.dstStackOffset { + a.len += b.len + return true + } + return false +} + +// abiDesc specifies how to translate from a C frame to a Go +// frame. This does not specify how to translate back because +// the result is always a uintptr. If the C ABI is fastcall, +// this assumes the four fastcall registers were first spilled +// to the shadow space. +type abiDesc struct { + parts []abiPart + + srcStackSize uintptr // stdcall/fastcall stack space tracking + dstStackSize uintptr // Go stack space used + dstRegisters int // Go ABI int argument registers used - // abiMap specifies how to translate from a C frame to a Go - // frame. This does not specify how to translate back because - // the result is always a uintptr. If the C ABI is fastcall, - // this assumes the four fastcall registers were first spilled - // to the shadow space. - abiMap []abiPart // retOffset is the offset of the uintptr-sized result in the Go // frame. retOffset uintptr } -// abiPart encodes a step in translating between calling ABIs. -type abiPart struct { - src, dst uintptr - len uintptr +func (p *abiDesc) assignArg(t *_type) { + if t.size > sys.PtrSize { + // We don't support this right now. In + // stdcall/cdecl, 64-bit ints and doubles are + // passed as two words (little endian); and + // structs are pushed on the stack. In + // fastcall, arguments larger than the word + // size are passed by reference. On arm, + // 8-byte aligned arguments round up to the + // next even register and can be split across + // registers and the stack. + panic("compileCallback: argument size is larger than uintptr") + } + if k := t.kind & kindMask; GOARCH != "386" && (k == kindFloat32 || k == kindFloat64) { + // In fastcall, floating-point arguments in + // the first four positions are passed in + // floating-point registers, which we don't + // currently spill. arm passes floating-point + // arguments in VFP registers, which we also + // don't support. + // So basically we only support 386. + panic("compileCallback: float arguments not supported") + } + + if t.size == 0 { + // The Go ABI aligns for zero-sized types. + p.dstStackSize = alignUp(p.dstStackSize, uintptr(t.align)) + return + } + + // In the C ABI, we're already on a word boundary. + // Also, sub-word-sized fastcall register arguments + // are stored to the least-significant bytes of the + // argument word and all supported Windows + // architectures are little endian, so srcStackOffset + // is already pointing to the right place for smaller + // arguments. The same is true on arm. + + oldParts := p.parts + if !p.tryRegAssignArg(t, 0) { + // Register assignment failed. + // Undo the work and stack assign. + p.parts = oldParts + + // The Go ABI aligns arguments. + p.dstStackSize = alignUp(p.dstStackSize, uintptr(t.align)) + + // Copy just the size of the argument. Note that this + // could be a small by-value struct, but C and Go + // struct layouts are compatible, so we can copy these + // directly, too. + part := abiPart{ + kind: abiPartStack, + srcStackOffset: p.srcStackSize, + dstStackOffset: p.dstStackSize, + len: t.size, + } + // Add this step to the adapter. + if len(p.parts) == 0 || !p.parts[len(p.parts)-1].tryMerge(part) { + p.parts = append(p.parts, part) + } + // The Go ABI packs arguments. + p.dstStackSize += t.size + } + + // cdecl, stdcall, fastcall, and arm pad arguments to word size. + // TODO(rsc): On arm and arm64 do we need to skip the caller's saved LR? + p.srcStackSize += sys.PtrSize } -func (a *abiPart) tryMerge(b abiPart) bool { - if a.src+a.len == b.src && a.dst+a.len == b.dst { - a.len += b.len +// tryRegAssignArg tries to register-assign a value of type t. +// If this type is nested in an aggregate type, then offset is the +// offset of this type within its parent type. +// Assumes t.size <= sys.PtrSize and t.size != 0. +// +// Returns whether the assignment succeeded. +func (p *abiDesc) tryRegAssignArg(t *_type, offset uintptr) bool { + switch k := t.kind & kindMask; k { + case kindBool, kindInt, kindInt8, kindInt16, kindInt32, kindUint, kindUint8, kindUint16, kindUint32, kindUintptr, kindPtr, kindUnsafePointer: + // Assign a register for all these types. + return p.assignReg(t.size, offset) + case kindInt64, kindUint64: + // Only register-assign if the registers are big enough. + if sys.PtrSize == 8 { + return p.assignReg(t.size, offset) + } + case kindArray: + at := (*arraytype)(unsafe.Pointer(t)) + if at.len == 1 { + return p.tryRegAssignArg(at.elem, offset) + } + case kindStruct: + st := (*structtype)(unsafe.Pointer(t)) + for i := range st.fields { + f := &st.fields[i] + if !p.tryRegAssignArg(f.typ, offset+f.offset()) { + return false + } + } return true } - return false + // Pointer-sized types such as maps and channels are currently + // not supported. + panic("compileCallabck: type " + t.string() + " is currently not supported for use in system callbacks") +} + +// assignReg attempts to assign a single register for an +// argument with the given size, at the given offset into the +// value in the C ABI space. +// +// Returns whether the assignment was successful. +func (p *abiDesc) assignReg(size, offset uintptr) bool { + if p.dstRegisters >= intArgRegs { + return false + } + p.parts = append(p.parts, abiPart{ + kind: abiPartReg, + srcStackOffset: p.srcStackSize + offset, + dstRegister: p.dstRegisters, + len: size, + }) + p.dstRegisters++ + return true } type winCallbackKey struct { @@ -101,62 +249,14 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { ft := (*functype)(unsafe.Pointer(fn._type)) // Check arguments and construct ABI translation. - var abiMap []abiPart - var src, dst uintptr + var abiMap abiDesc for _, t := range ft.in() { - if t.size > sys.PtrSize { - // We don't support this right now. In - // stdcall/cdecl, 64-bit ints and doubles are - // passed as two words (little endian); and - // structs are pushed on the stack. In - // fastcall, arguments larger than the word - // size are passed by reference. On arm, - // 8-byte aligned arguments round up to the - // next even register and can be split across - // registers and the stack. - panic("compileCallback: argument size is larger than uintptr") - } - if k := t.kind & kindMask; GOARCH != "386" && (k == kindFloat32 || k == kindFloat64) { - // In fastcall, floating-point arguments in - // the first four positions are passed in - // floating-point registers, which we don't - // currently spill. arm passes floating-point - // arguments in VFP registers, which we also - // don't support. - // So basically we only support 386. - panic("compileCallback: float arguments not supported") - } - - // The Go ABI aligns arguments. - dst = alignUp(dst, uintptr(t.align)) - // In the C ABI, we're already on a word boundary. - // Also, sub-word-sized fastcall register arguments - // are stored to the least-significant bytes of the - // argument word and all supported Windows - // architectures are little endian, so src is already - // pointing to the right place for smaller arguments. - // The same is true on arm. - - // Copy just the size of the argument. Note that this - // could be a small by-value struct, but C and Go - // struct layouts are compatible, so we can copy these - // directly, too. - part := abiPart{src, dst, t.size} - // Add this step to the adapter. - if len(abiMap) == 0 || !abiMap[len(abiMap)-1].tryMerge(part) { - abiMap = append(abiMap, part) - } - - // cdecl, stdcall, fastcall, and arm pad arguments to word size. - // TODO(rsc): On arm and arm64 do we need to skip the caller's saved LR? - src += sys.PtrSize - // The Go ABI packs arguments. - dst += t.size + abiMap.assignArg(t) } // The Go ABI aligns the result to the word size. src is // already aligned. - dst = alignUp(dst, sys.PtrSize) - retOffset := dst + abiMap.dstStackSize = alignUp(abiMap.dstStackSize, sys.PtrSize) + abiMap.retOffset = abiMap.dstStackSize if len(ft.out()) != 1 { panic("compileCallback: expected function with one uintptr-sized result") @@ -170,10 +270,14 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // Either way, it's not AX. panic("compileCallback: float results not supported") } - // Make room for the uintptr-sized result. - dst += sys.PtrSize + if intArgRegs == 0 { + // Make room for the uintptr-sized result. + // If there are argument registers, the return value will + // be passed in the first register. + abiMap.dstStackSize += sys.PtrSize + } - if dst > callbackMaxFrame { + if abiMap.dstStackSize > callbackMaxFrame { panic("compileCallback: function argument frame too large") } @@ -181,7 +285,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { // arguments from the C stack. var retPop uintptr if cdecl { - retPop = src + retPop = abiMap.srcStackSize } key := winCallbackKey{(*funcval)(fn.data), cdecl} @@ -203,7 +307,7 @@ func compileCallback(fn eface, cdecl bool) (code uintptr) { unlock(&cbs.lock) throw("too many callback functions") } - c := winCallback{key.fn, retPop, abiMap, retOffset} + c := winCallback{key.fn, retPop, abiMap} cbs.ctxt[n] = c cbs.index[key] = n cbs.n++ @@ -237,22 +341,39 @@ func callbackWrap(a *callbackArgs) { a.retPop = c.retPop // Convert from C to Go ABI. + var regs abi.RegArgs var frame [callbackMaxFrame]byte goArgs := unsafe.Pointer(&frame) - for _, part := range c.abiMap { - memmove(add(goArgs, part.dst), add(a.args, part.src), part.len) + for _, part := range c.abiMap.parts { + switch part.kind { + case abiPartStack: + memmove(add(goArgs, part.dstStackOffset), add(a.args, part.srcStackOffset), part.len) + case abiPartReg: + goReg := unsafe.Pointer(®s.Ints[part.dstRegister]) + memmove(goReg, add(a.args, part.srcStackOffset), part.len) + default: + panic("bad ABI description") + } } // Even though this is copying back results, we can pass a nil // type because those results must not require write barriers. - // - // Pass a dummy RegArgs for now. - // TODO(mknyszek): Pass arguments in registers. - var regs abi.RegArgs - reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.retOffset)+sys.PtrSize, uint32(c.retOffset), uint32(c.retOffset)+sys.PtrSize, ®s) + reflectcall(nil, unsafe.Pointer(c.fn), noescape(goArgs), uint32(c.abiMap.dstStackSize), uint32(c.abiMap.retOffset), uint32(c.abiMap.dstStackSize), ®s) // Extract the result. - a.result = *(*uintptr)(unsafe.Pointer(&frame[c.retOffset])) + // + // There's always exactly one return value, one pointer in size. + // If it's on the stack, then we will have reserved space for it + // at the end of the frame, otherwise it was passed in a register. + if c.abiMap.dstStackSize != c.abiMap.retOffset { + a.result = *(*uintptr)(unsafe.Pointer(&frame[c.abiMap.retOffset])) + } else { + var zero int + // On architectures with no registers, Ints[0] would be a compile error, + // so we use a dynamic index. These architectures will never take this + // branch, so this won't cause a runtime panic. + a.result = regs.Ints[zero] + } } const _LOAD_LIBRARY_SEARCH_SYSTEM32 = 0x00000800 diff --git a/src/runtime/syscall_windows_test.go b/src/runtime/syscall_windows_test.go index fb215b3c319..65ecb4fbf28 100644 --- a/src/runtime/syscall_windows_test.go +++ b/src/runtime/syscall_windows_test.go @@ -7,6 +7,7 @@ package runtime_test import ( "bytes" "fmt" + "internal/abi" "internal/syscall/windows/sysdll" "internal/testenv" "io" @@ -390,6 +391,103 @@ var cbFuncs = []cbFunc{ }}, } +//go:registerparams +func sum2(i1, i2 uintptr) uintptr { + return i1 + i2 +} + +//go:registerparams +func sum3(i1, i2, i3 uintptr) uintptr { + return i1 + i2 + i3 +} + +//go:registerparams +func sum4(i1, i2, i3, i4 uintptr) uintptr { + return i1 + i2 + i3 + i4 +} + +//go:registerparams +func sum5(i1, i2, i3, i4, i5 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 +} + +//go:registerparams +func sum6(i1, i2, i3, i4, i5, i6 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 +} + +//go:registerparams +func sum7(i1, i2, i3, i4, i5, i6, i7 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 +} + +//go:registerparams +func sum8(i1, i2, i3, i4, i5, i6, i7, i8 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 +} + +//go:registerparams +func sum9(i1, i2, i3, i4, i5, i6, i7, i8, i9 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 +} + +//go:registerparams +func sum10(i1, i2, i3, i4, i5, i6, i7, i8, i9, i10 uintptr) uintptr { + return i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9 + i10 +} + +//go:registerparams +func sum9uint8(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) +} + +//go:registerparams +func sum9uint16(i1, i2, i3, i4, i5, i6, i7, i8, i9 uint16) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) +} + +//go:registerparams +func sum9int8(i1, i2, i3, i4, i5, i6, i7, i8, i9 int8) uintptr { + return uintptr(i1 + i2 + i3 + i4 + i5 + i6 + i7 + i8 + i9) +} + +//go:registerparams +func sum5mix(i1 int8, i2 int16, i3 int32, i4, i5 uintptr) uintptr { + return uintptr(i1) + uintptr(i2) + uintptr(i3) + i4 + i5 +} + +//go:registerparams +func sum5andPair(i1, i2, i3, i4, i5 uint8Pair) uintptr { + return uintptr(i1.x + i1.y + i2.x + i2.y + i3.x + i3.y + i4.x + i4.y + i5.x + i5.y) +} + +// TODO(register args): Remove this once we switch to using the register +// calling convention by default, since this is redundant with the existing +// tests. +var cbFuncsRegABI = []cbFunc{ + {sum2}, + {sum3}, + {sum4}, + {sum5}, + {sum6}, + {sum7}, + {sum8}, + {sum9}, + {sum10}, + {sum9uint8}, + {sum9uint16}, + {sum9int8}, + {sum5mix}, + {sum5andPair}, +} + +func getCallbackTestFuncs() []cbFunc { + if regs := runtime.SetIntArgRegs(-1); regs > 0 { + return cbFuncsRegABI + } + return cbFuncs +} + type cbDLL struct { name string buildArgs func(out, src string) []string @@ -406,7 +504,7 @@ func (d *cbDLL) makeSrc(t *testing.T, path string) { #include typedef struct { uint8_t x, y; } uint8Pair_t; `) - for _, cbf := range cbFuncs { + for _, cbf := range getCallbackTestFuncs() { cbf.cSrc(f, false) cbf.cSrc(f, true) } @@ -451,12 +549,15 @@ func TestStdcallAndCDeclCallbacks(t *testing.T) { } defer os.RemoveAll(tmp) + oldRegs := runtime.SetIntArgRegs(abi.IntArgRegs) + defer runtime.SetIntArgRegs(oldRegs) + for _, dll := range cbDLLs { t.Run(dll.name, func(t *testing.T) { dllPath := dll.build(t, tmp) dll := syscall.MustLoadDLL(dllPath) defer dll.Release() - for _, cbf := range cbFuncs { + for _, cbf := range getCallbackTestFuncs() { t.Run(cbf.cName(false), func(t *testing.T) { stdcall := syscall.NewCallback(cbf.goFunc) cbf.testOne(t, dll, false, stdcall)