cmd/compile: move raw writes out of write barrier code

Previously, the write barrier calls themselves did the actual writes to memory. Instead, move those writes out to a common location that both the wb-enabled and wb-disabled code paths share. This enables us to optimize the write barrier path without having to worry about performing the actual writes. Change-Id: Ia71ab651908ec124cc33141afb52e4ca19733ac6 Reviewed-on: https://go-review.googlesource.com/c/go/+/447780 Reviewed-by: Michael Knyszek <mknyszek@google.com> TryBot-Bypass: Keith Randall <khr@golang.org> Run-TryBot: Keith Randall <khr@golang.org> Reviewed-by: Cherry Mui <cherryyz@google.com>
2025-12-08 06:10:04 +00:00 · 2022-11-01 14:18:09 -07:00 · 2022-11-01 14:18:09 -07:00 · d49719b1f7
commit d49719b1f7
parent d3daeb5267
16 changed files with 152 additions and 169 deletions
--- a/src/cmd/compile/internal/ssa/writebarrier.go
+++ b/src/cmd/compile/internal/ssa/writebarrier.go
@ -132,7 +132,8 @@ func writebarrier(f *Func) {
 	}

 	var sb, sp, wbaddr, const0 *Value
-	var typedmemmove, typedmemclr, gcWriteBarrier, cgoCheckPtrWrite, cgoCheckMemmove *obj.LSym
+	var gcWriteBarrier, cgoCheckPtrWrite, cgoCheckMemmove *obj.LSym
+	var wbZero, wbMove *obj.LSym
 	var stores, after []*Value
 	var sset *sparseSet
 	var storeNumber []int32
@ -185,8 +186,8 @@ func writebarrier(f *Func) {
 			wbsym := f.fe.Syslook("writeBarrier")
 			wbaddr = f.Entry.NewValue1A(initpos, OpAddr, f.Config.Types.UInt32Ptr, wbsym, sb)
 			gcWriteBarrier = f.fe.Syslook("gcWriteBarrier")
-			typedmemmove = f.fe.Syslook("typedmemmove")
-			typedmemclr = f.fe.Syslook("typedmemclr")
+			wbZero = f.fe.Syslook("wbZero")
+			wbMove = f.fe.Syslook("wbMove")
 			if buildcfg.Experiment.CgoCheck2 {
 				cgoCheckPtrWrite = f.fe.Syslook("cgoCheckPtrWrite")
 				cgoCheckMemmove = f.fe.Syslook("cgoCheckMemmove")
@ -235,6 +236,51 @@ func writebarrier(f *Func) {
 		// find the memory before the WB stores
 		mem := stores[0].MemoryArg()
 		pos := stores[0].Pos
+
+		// If the source of a MoveWB is volatile (will be clobbered by a
+		// function call), we need to copy it to a temporary location, as
+		// marshaling the args of wbMove might clobber the value we're
+		// trying to move.
+		// Look for volatile source, copy it to temporary before we check
+		// the write barrier flag.
+		// It is unlikely to have more than one of them. Just do a linear
+		// search instead of using a map.
+		// See issue 15854.
+		type volatileCopy struct {
+			src *Value // address of original volatile value
+			tmp *Value // address of temporary we've copied the volatile value into
+		}
+		var volatiles []volatileCopy
+
+		if !(f.ABIDefault == f.ABI1 && len(f.Config.intParamRegs) >= 3) {
+			// We don't need to do this if the calls we're going to do take
+			// all their arguments in registers.
+			// 3 is the magic number because it covers wbZero, wbMove, cgoCheckMemmove.
+		copyLoop:
+			for _, w := range stores {
+				if w.Op == OpMoveWB {
+					val := w.Args[1]
+					if isVolatile(val) {
+						for _, c := range volatiles {
+							if val == c.src {
+								continue copyLoop // already copied
+							}
+						}
+
+						t := val.Type.Elem()
+						tmp := f.fe.Auto(w.Pos, t)
+						mem = b.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, mem)
+						tmpaddr := b.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, mem)
+						siz := t.Size()
+						mem = b.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, mem)
+						mem.Aux = t
+						volatiles = append(volatiles, volatileCopy{val, tmpaddr})
+					}
+				}
+			}
+		}
+
+		// Build branch point.
 		bThen := f.NewBlock(BlockPlain)
 		bElse := f.NewBlock(BlockPlain)
 		bEnd := f.NewBlock(b.Kind)
@ -274,123 +320,86 @@ func writebarrier(f *Func) {
 		bThen.AddEdgeTo(bEnd)
 		bElse.AddEdgeTo(bEnd)

-		// for each write barrier store, append write barrier version to bThen
-		// and simple store version to bElse
+		// then block: emit write barrier calls
 		memThen := mem
-		memElse := mem
-
-		// If the source of a MoveWB is volatile (will be clobbered by a
-		// function call), we need to copy it to a temporary location, as
-		// marshaling the args of typedmemmove might clobber the value we're
-		// trying to move.
-		// Look for volatile source, copy it to temporary before we emit any
-		// call.
-		// It is unlikely to have more than one of them. Just do a linear
-		// search instead of using a map.
-		type volatileCopy struct {
-			src *Value // address of original volatile value
-			tmp *Value // address of temporary we've copied the volatile value into
-		}
-		var volatiles []volatileCopy
-	copyLoop:
 		for _, w := range stores {
-			if w.Op == OpMoveWB {
-				val := w.Args[1]
-				if isVolatile(val) {
-					for _, c := range volatiles {
-						if val == c.src {
-							continue copyLoop // already copied
-						}
-					}
-
-					t := val.Type.Elem()
-					tmp := f.fe.Auto(w.Pos, t)
-					memThen = bThen.NewValue1A(w.Pos, OpVarDef, types.TypeMem, tmp, memThen)
-					tmpaddr := bThen.NewValue2A(w.Pos, OpLocalAddr, t.PtrTo(), tmp, sp, memThen)
-					siz := t.Size()
-					memThen = bThen.NewValue3I(w.Pos, OpMove, types.TypeMem, siz, tmpaddr, val, memThen)
-					memThen.Aux = t
-					volatiles = append(volatiles, volatileCopy{val, tmpaddr})
-				}
-			}
-		}
-
-		for _, w := range stores {
-			ptr := w.Args[0]
 			pos := w.Pos
-
-			var fn *obj.LSym
-			var typ *obj.LSym
-			var val *Value
 			switch w.Op {
 			case OpStoreWB:
-				val = w.Args[1]
-				nWBops--
-			case OpMoveWB:
-				fn = typedmemmove
-				val = w.Args[1]
-				typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
+				ptr := w.Args[0]
+				val := w.Args[1]
+				memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
+				f.fe.SetWBPos(pos)
 				nWBops--
 			case OpZeroWB:
-				fn = typedmemclr
-				typ = reflectdata.TypeLinksym(w.Aux.(*types.Type))
+				dst := w.Args[0]
+				typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
+				// zeroWB(&typ, dst)
+				taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
+				memThen = wbcall(pos, bThen, wbZero, sp, memThen, taddr, dst)
+				f.fe.SetWBPos(pos)
 				nWBops--
-			case OpVarDef, OpVarLive:
-			}
-
-			// then block: emit write barrier call
-			switch w.Op {
-			case OpStoreWB, OpMoveWB, OpZeroWB:
-				if w.Op == OpStoreWB {
-					if buildcfg.Experiment.CgoCheck2 {
-						// Issue cgo checking code.
-						memThen = wbcall(pos, bThen, cgoCheckPtrWrite, nil, ptr, val, memThen, sp, sb)
-					}
-
-					memThen = bThen.NewValue3A(pos, OpWB, types.TypeMem, gcWriteBarrier, ptr, val, memThen)
-				} else {
-					srcval := val
-					if w.Op == OpMoveWB && isVolatile(srcval) {
-						for _, c := range volatiles {
-							if srcval == c.src {
-								srcval = c.tmp
-								break
-							}
+			case OpMoveWB:
+				dst := w.Args[0]
+				src := w.Args[1]
+				if isVolatile(src) {
+					for _, c := range volatiles {
+						if src == c.src {
+							src = c.tmp
+							break
 						}
 					}
-					memThen = wbcall(pos, bThen, fn, typ, ptr, srcval, memThen, sp, sb)
 				}
-				// Note that we set up a writebarrier function call.
+				typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
+				// moveWB(&typ, dst, src)
+				taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
+				memThen = wbcall(pos, bThen, wbMove, sp, memThen, taddr, dst, src)
 				f.fe.SetWBPos(pos)
-			case OpVarDef, OpVarLive:
-				memThen = bThen.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memThen)
+				nWBops--
 			}
+		}
+		// merge memory
+		mem = bEnd.NewValue2(pos, OpPhi, types.TypeMem, memThen, mem)

-			// else block: normal store
+		// Do raw stores after merge point.
+		for _, w := range stores {
 			switch w.Op {
 			case OpStoreWB:
+				ptr := w.Args[0]
+				val := w.Args[1]
 				if buildcfg.Experiment.CgoCheck2 {
 					// Issue cgo checking code.
-					memElse = wbcall(pos, bElse, cgoCheckPtrWrite, nil, ptr, val, memElse, sp, sb)
+					mem = wbcall(pos, bEnd, cgoCheckPtrWrite, sp, mem, ptr, val)
 				}
-				memElse = bElse.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, memElse)
-			case OpMoveWB:
-				if buildcfg.Experiment.CgoCheck2 {
-					// Issue cgo checking code.
-					memElse = wbcall(pos, bElse, cgoCheckMemmove, reflectdata.TypeLinksym(w.Aux.(*types.Type)), ptr, val, memElse, sp, sb)
-				}
-				memElse = bElse.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, ptr, val, memElse)
-				memElse.Aux = w.Aux
+				mem = bEnd.NewValue3A(pos, OpStore, types.TypeMem, w.Aux, ptr, val, mem)
 			case OpZeroWB:
-				memElse = bElse.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, ptr, memElse)
-				memElse.Aux = w.Aux
+				dst := w.Args[0]
+				mem = bEnd.NewValue2I(pos, OpZero, types.TypeMem, w.AuxInt, dst, mem)
+				mem.Aux = w.Aux
+			case OpMoveWB:
+				dst := w.Args[0]
+				src := w.Args[1]
+				if isVolatile(src) {
+					for _, c := range volatiles {
+						if src == c.src {
+							src = c.tmp
+							break
+						}
+					}
+				}
+				if buildcfg.Experiment.CgoCheck2 {
+					// Issue cgo checking code.
+					typ := reflectdata.TypeLinksym(w.Aux.(*types.Type))
+					taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
+					mem = wbcall(pos, bEnd, cgoCheckMemmove, sp, mem, taddr, dst, src)
+				}
+				mem = bEnd.NewValue3I(pos, OpMove, types.TypeMem, w.AuxInt, dst, src, mem)
+				mem.Aux = w.Aux
 			case OpVarDef, OpVarLive:
-				memElse = bElse.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, memElse)
+				mem = bEnd.NewValue1A(pos, w.Op, types.TypeMem, w.Aux, mem)
 			}
 		}

-		// merge memory
-		mem = bEnd.NewValue2(pos, OpPhi, types.TypeMem, memThen, memElse)
 		// The last store becomes the WBend marker. This marker is used by the liveness
 		// pass to determine what parts of the code are preemption-unsafe.
 		// All subsequent memory operations use this memory, so we have to sacrifice the
@ -535,58 +544,35 @@ func (f *Func) computeZeroMap(select1 []*Value) map[ID]ZeroRegion {
 }

 // wbcall emits write barrier runtime call in b, returns memory.
-func wbcall(pos src.XPos, b *Block, fn, typ *obj.LSym, ptr, val, mem, sp, sb *Value) *Value {
+func wbcall(pos src.XPos, b *Block, fn *obj.LSym, sp, mem *Value, args ...*Value) *Value {
 	config := b.Func.Config
+	typ := config.Types.Uintptr // type of all argument values
+	nargs := len(args)

-	var wbargs []*Value
 	// TODO (register args) this is a bit of a hack.
 	inRegs := b.Func.ABIDefault == b.Func.ABI1 && len(config.intParamRegs) >= 3

-	// put arguments on stack
-	off := config.ctxt.Arch.FixedFrameSize
-
-	var argTypes []*types.Type
-	if typ != nil { // for typedmemmove/cgoCheckMemmove
-		taddr := b.NewValue1A(pos, OpAddr, b.Func.Config.Types.Uintptr, typ, sb)
-		argTypes = append(argTypes, b.Func.Config.Types.Uintptr)
-		off = round(off, taddr.Type.Alignment())
-		if inRegs {
-			wbargs = append(wbargs, taddr)
-		} else {
-			arg := b.NewValue1I(pos, OpOffPtr, taddr.Type.PtrTo(), off, sp)
-			mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, taddr, mem)
+	if !inRegs {
+		// Store arguments to the appropriate stack slot.
+		off := config.ctxt.Arch.FixedFrameSize
+		for _, arg := range args {
+			stkaddr := b.NewValue1I(pos, OpOffPtr, typ.PtrTo(), off, sp)
+			mem = b.NewValue3A(pos, OpStore, types.TypeMem, typ, stkaddr, arg, mem)
+			off += typ.Size()
 		}
-		off += taddr.Type.Size()
+		args = args[:0]
 	}

-	argTypes = append(argTypes, ptr.Type)
-	off = round(off, ptr.Type.Alignment())
-	if inRegs {
-		wbargs = append(wbargs, ptr)
-	} else {
-		arg := b.NewValue1I(pos, OpOffPtr, ptr.Type.PtrTo(), off, sp)
-		mem = b.NewValue3A(pos, OpStore, types.TypeMem, ptr.Type, arg, ptr, mem)
-	}
-	off += ptr.Type.Size()
-
-	if val != nil {
-		argTypes = append(argTypes, val.Type)
-		off = round(off, val.Type.Alignment())
-		if inRegs {
-			wbargs = append(wbargs, val)
-		} else {
-			arg := b.NewValue1I(pos, OpOffPtr, val.Type.PtrTo(), off, sp)
-			mem = b.NewValue3A(pos, OpStore, types.TypeMem, val.Type, arg, val, mem)
-		}
-		off += val.Type.Size()
-	}
-	off = round(off, config.PtrSize)
-	wbargs = append(wbargs, mem)
+	args = append(args, mem)

 	// issue call
+	argTypes := make([]*types.Type, nargs, 3) // at most 3 args; allows stack allocation
+	for i := 0; i < nargs; i++ {
+		argTypes[i] = typ
+	}
 	call := b.NewValue0A(pos, OpStaticCall, types.TypeResultMem, StaticAuxCall(fn, b.Func.ABIDefault.ABIAnalyzeTypes(nil, argTypes, nil)))
-	call.AddArgs(wbargs...)
-	call.AuxInt = off - config.ctxt.Arch.FixedFrameSize
+	call.AddArgs(args...)
+	call.AuxInt = int64(nargs) * typ.Size()
 	return b.NewValue1I(pos, OpSelectN, types.TypeMem, 0, call)
 }