runtime: fix CPU underutilization

runtime.newproc/ready are deliberately sloppy about waking new M's, they only ensure that there is at least 1 spinning M. Currently to compensate for that, schedule() checks if the current P has local work and there are no spinning M's, it wakes up another one. It does not work if goroutines do not call schedule. With this change a spinning M wakes up another M when it finds work to do. It's also not ideal, but it fixes the underutilization. A proper check would require to know the exact number of runnable G's, but it's too expensive to maintain. Fixes #5586. R=rsc CC=gobot, golang-dev https://golang.org/cl/9776044
2025-12-08 06:10:04 +00:00 · 2013-06-27 20:52:12 +04:00 · 2013-06-27 20:52:12 +04:00 · 15a1c3d1e4
commit 15a1c3d1e4
parent eac6bee7c1
2 changed files with 51 additions and 14 deletions
--- a/src/pkg/runtime/proc.c
+++ b/src/pkg/runtime/proc.c
@ -1018,7 +1018,7 @@ execute(G *gp)
 // Finds a runnable goroutine to execute.
 // Tries to steal from other P's, get g from global queue, poll network.
 static G*
-findrunnable(void)
+findrunnable1(void)
 {
 	G *gp;
 	P *p;
@ -1127,6 +1127,29 @@ stop:
 	goto top;
 }
 static G*
 findrunnable(void)
 {
 	G *gp;
 	int32 nmspinning;
 	gp = findrunnable1();  // blocks until work is available
 	if(m->spinning) {
 		m->spinning = false;
 		nmspinning = runtime·xadd(&runtime·sched.nmspinning, -1);
 		if(nmspinning < 0)
 			runtime·throw("findrunnable: negative nmspinning");
 	} else
 		nmspinning = runtime·atomicload(&runtime·sched.nmspinning);
 	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
 	// so see if we need to wakeup another P here.
 	if (nmspinning == 0 && runtime·atomicload(&runtime·sched.npidle) > 0)
 		wakep();
 	return gp;
 }
 // Injects the list of runnable G's into the scheduler.
 // Can run concurrently with GC.
 static void
@ -1185,21 +1208,11 @@ top:
 			runtime·throw("schedule: spinning with local work");
 	}
 	if(gp == nil)
-		gp = findrunnable();
+		gp = findrunnable();  // blocks until work is available
 	if(m->spinning) {
 		m->spinning = false;
 		runtime·xadd(&runtime·sched.nmspinning, -1);
 	}
 	// M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
 	// so see if we need to wakeup another M here.
 	if (m->p->runqhead != m->p->runqtail &&
 		runtime·atomicload(&runtime·sched.nmspinning) == 0 &&
 		runtime·atomicload(&runtime·sched.npidle) > 0)  // TODO: fast atomic
 		wakep();
 	if(gp->lockedm) {
 		// Hands off own p to the locked m,
 		// then blocks waiting for a new p.
 		startlockedm(gp);
 		goto top;
 	}
--- a/src/pkg/runtime/proc_test.go
+++ b/src/pkg/runtime/proc_test.go
@ -93,6 +93,30 @@ func TestYieldLocked(t *testing.T) {
 	<-c
 }
 func TestGoroutineParallelism(t *testing.T) {
 	const P = 4
 	defer runtime.GOMAXPROCS(runtime.GOMAXPROCS(P))
 	for try := 0; try < 10; try++ {
 		done := make(chan bool)
 		x := uint32(0)
 		for p := 0; p < P; p++ {
 			// Test that all P goroutines are scheduled at the same time
 			go func(p int) {
 				for i := 0; i < 3; i++ {
 					expected := uint32(P*i + p)
 					for atomic.LoadUint32(&x) != expected {
 					}
 					atomic.StoreUint32(&x, expected+1)
 				}
 				done <- true
 			}(p)
 		}
 		for p := 0; p < P; p++ {
 			<-done
 		}
 	}
 }
 func TestBlockLocked(t *testing.T) {
 	const N = 10
 	c := make(chan bool)