diff --git a/src/runtime/malloc.go b/src/runtime/malloc.go index 73d663f7f5..ba9e02e976 100644 --- a/src/runtime/malloc.go +++ b/src/runtime/malloc.go @@ -1544,12 +1544,13 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin size = span.elemsize x := unsafe.Pointer(span.base()) - // Ensure that the stores above that initialize x to - // type-safe memory and set the heap bits occur before - // the caller can make x observable to the garbage - // collector. Otherwise, on weakly ordered machines, - // the garbage collector could follow a pointer to x, - // but see uninitialized memory or stale heap bits. + // Ensure that the store above that sets largeType to + // nil happens before the caller can make x observable + // to the garbage collector. + // + // Otherwise, on weakly ordered machines, the garbage + // collector could follow a pointer to x, but see a stale + // largeType value. publicationBarrier() // As x and the heap bits are initialized, update // freeIndexForScan now so x is seen by the GC @@ -1592,22 +1593,33 @@ func mallocgcLarge(size uintptr, typ *_type, needzero bool) (unsafe.Pointer, uin } // Objects can be zeroed late in a context where preemption can occur. - // If the object contains pointers, its pointer data must be cleared - // or otherwise indicate that the GC shouldn't scan it. + // // x will keep the memory alive. - if noscan := typ == nil || !typ.Pointers(); !noscan || (needzero && span.needzero != 0) { + if needzero && span.needzero != 0 { // N.B. size == fullSize always in this case. memclrNoHeapPointersChunked(size, x) // This is a possible preemption point: see #47302 - - // Finish storing the type information for this case. - mp := acquirem() - if !noscan { - getMCache(mp).scanAlloc += heapSetTypeLarge(uintptr(x), size, typ, span) - } - // Publish the object with the now-zeroed memory. - publicationBarrier() - releasem(mp) } + + // Set the type and run the publication barrier while non-preemptible. We need to make + // sure that between heapSetTypeLarge and publicationBarrier we cannot get preempted, + // otherwise the GC could potentially observe non-zeroed memory but largeType set on weak + // memory architectures. + // + // The GC can also potentially observe non-zeroed memory if conservative scanning spuriously + // observes a partially-allocated object, see the freeIndexForScan update above. This case is + // handled by synchronization inside heapSetTypeLarge. + mp = acquirem() + if typ != nil && typ.Pointers() { + // Finish storing the type information, now that we're certain the memory is zeroed. + getMCache(mp).scanAlloc += heapSetTypeLarge(uintptr(x), size, typ, span) + } + // Publish the object again, now with zeroed memory and initialized type information. + // + // Even if we didn't update any type information, this is necessary to ensure that, for example, + // x written to a global without any synchronization still results in other goroutines observing + // zeroed memory. + publicationBarrier() + releasem(mp) return x, size } diff --git a/src/runtime/mbitmap.go b/src/runtime/mbitmap.go index 148b2d788e..4bffabece2 100644 --- a/src/runtime/mbitmap.go +++ b/src/runtime/mbitmap.go @@ -191,7 +191,9 @@ func (span *mspan) typePointersOfUnchecked(addr uintptr) typePointers { typ = *(**_type)(unsafe.Pointer(addr)) addr += mallocHeaderSize } else { - typ = span.largeType + // Synchronize with allocator, in case this came from the conservative scanner. + // See heapSetTypeLarge for more details. + typ = (*_type)(atomic.Loadp(unsafe.Pointer(&span.largeType))) if typ == nil { // Allow a nil type here for delayed zeroing. See mallocgc. return typePointers{} @@ -739,8 +741,56 @@ func heapSetTypeSmallHeader(x, dataSize uintptr, typ *_type, header **_type, spa func heapSetTypeLarge(x, dataSize uintptr, typ *_type, span *mspan) uintptr { gctyp := typ - // Write out the header. - span.largeType = gctyp + // Write out the header atomically to synchronize with the garbage collector. + // + // This atomic store is paired with an atomic load in typePointersOfUnchecked. + // This store ensures that initializing x's memory cannot be reordered after + // this store. Meanwhile the load in typePointersOfUnchecked ensures that + // reading x's memory cannot be reordered before largeType is loaded. Together, + // these two operations guarantee that the garbage collector can only see + // initialized memory if largeType is non-nil. + // + // Gory details below... + // + // Ignoring conservative scanning for a moment, this store need not be atomic + // if we have a publication barrier on our side. This is because the garbage + // collector cannot observe x unless: + // 1. It stops this goroutine and scans its stack, or + // 2. We return from mallocgc and publish the pointer somewhere. + // Either case requires a write on our side, followed by some synchronization + // followed by a read by the garbage collector. + // + // In case (1), the garbage collector can only observe a nil largeType, since it + // had to stop our goroutine when it was preemptible during zeroing. For the + // duration of the zeroing, largeType is nil and the object has nothing interesting + // for the garbage collector to look at, so the garbage collector will not access + // the object at all. + // + // In case (2), the garbage collector can also observe a nil largeType. This + // might happen if the object was newly allocated, and a new GC cycle didn't start + // (that would require a global barrier, STW). In this case, the garbage collector + // will once again ignore the object, and that's safe because objects are + // allocate-black. + // + // However, the garbage collector can also observe a non-nil largeType in case (2). + // This is still okay, since to access the object's memory, it must have first + // loaded the object's pointer from somewhere. This makes the access of the object's + // memory a data-dependent load, and our publication barrier in the allocator + // guarantees that a data-dependent load must observe a version of the object's + // data from after the publication barrier executed. + // + // Unfortunately conservative scanning is a problem. There's no guarantee of a + // data dependency as in case (2) because conservative scanning can produce pointers + // 'out of thin air' in that it need not have been written somewhere by the allocating + // thread first. It might not even be a pointer, or it could be a pointer written to + // some stack location long ago. This is the fundamental reason why we need + // explicit synchronization somewhere in this whole mess. We choose to put that + // synchronization on largeType. + // + // As described at the very top, the treating largeType as an atomic variable, on + // both the reader and writer side, is sufficient to ensure that only initialized + // memory at x will be observed if largeType is non-nil. + atomic.StorepNoWB(unsafe.Pointer(&span.largeType), unsafe.Pointer(gctyp)) if doubleCheckHeapSetType { doubleCheckHeapType(x, dataSize, typ, &span.largeType, span) }