net/http: avoid connCount underflow race

Remove a race condition in counting the number of connections per host,
which can cause a connCount underflow and a panic.

The race occurs when:

  - A RoundTrip call attempts to use a HTTP/2 roundtripper (pconn.alt != nil)
    and receives an isNoCachedConn error. The call removes the pconn from
    the idle conn pool and decrements the connCount for its host.
  - A second RoundTrip call on the same pconn succeeds,
    and delivers the pconn to a third RoundTrip waiting for a conn.
  - The third RoundTrip receives the pconn at the same moment its request
    context is canceled. It places the pconn back into the idle conn pool.

At this time, the connCount is incorrect, because the conn returned to
the idle pool is not matched by an increment in the connCount.

Fix this by not adding HTTP/2 pconns back to the idle pool in
wantConn.cancel.

Fixes #61474

Change-Id: I104d6cf85a54d0382eebf3fcf5dda99c69a7c3f6
Reviewed-on: https://go-review.googlesource.com/c/go/+/703936
Auto-Submit: Damien Neil <dneil@google.com>
Reviewed-by: Nicholas Husin <husin@google.com>
Reviewed-by: Nicholas Husin <nsh@golang.org>
LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com>
This commit is contained in:
Damien Neil 2025-09-15 15:18:57 -07:00 committed by Gopher Robot
parent 8ca209ec39
commit 3203a5da29
2 changed files with 36 additions and 1 deletions

View file

@ -1382,7 +1382,10 @@ func (w *wantConn) cancel(t *Transport) {
w.done = true
w.mu.Unlock()
if pc != nil {
// HTTP/2 connections (pc.alt != nil) aren't removed from the idle pool on use,
// and should not be added back here. If the pconn isn't in the idle pool,
// it's because we removed it due to an error.
if pc != nil && pc.alt == nil {
t.putOrCloseIdleConn(pc)
}
}

View file

@ -7625,3 +7625,35 @@ func TestTransportServerProtocols(t *testing.T) {
})
}
}
func TestIssue61474(t *testing.T) {
run(t, testIssue61474, []testMode{http2Mode})
}
func testIssue61474(t *testing.T, mode testMode) {
if testing.Short() {
return
}
// This test reliably exercises the condition causing #61474,
// but requires many iterations to do so.
// Keep the test around for now, but don't run it by default.
t.Skip("test is too large")
cst := newClientServerTest(t, mode, HandlerFunc(func(rw ResponseWriter, req *Request) {
}), func(tr *Transport) {
tr.MaxConnsPerHost = 1
})
var wg sync.WaitGroup
defer wg.Wait()
for range 100000 {
wg.Go(func() {
ctx, cancel := context.WithTimeout(t.Context(), 1*time.Millisecond)
defer cancel()
req, _ := NewRequestWithContext(ctx, "GET", cst.ts.URL, nil)
resp, err := cst.c.Do(req)
if err == nil {
resp.Body.Close()
}
})
}
}