database/sql: fix race when canceling queries immediately

Previously the following could happen, though in practice it would
be rare.

Goroutine 1:
	(*Tx).QueryContext begins a query, passing in userContext

Goroutine 2:
	(*Tx).awaitDone starts to wait on the context derived from the passed in context

Goroutine 1:
	(*Tx).grabConn returns a valid (*driverConn)
	The (*driverConn) passes to (*DB).queryConn

Goroutine 3:
	userContext is canceled

Goroutine 2:
	(*Tx).awaitDone unblocks and calls (*Tx).rollback
	(*driverConn).finalClose obtains dc.Mutex
	(*driverConn).finalClose sets dc.ci = nil

Goroutine 1:
	(*DB).queryConn obtains dc.Mutex in withLock
	ctxDriverPrepare accepts dc.ci which is now nil
	ctxCriverPrepare panics on the nil ci

The fix for this is to guard the Tx methods with a RWLock
holding it exclusivly when closing the Tx and holding a read lock
when executing a query.

Fixes #18719

Change-Id: I37aa02c37083c9793dabd28f7f934a1c5cbc05ea
Reviewed-on: https://go-review.googlesource.com/35550
Run-TryBot: Brad Fitzpatrick <bradfitz@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Brad Fitzpatrick <bradfitz@golang.org>
This commit is contained in:
Daniel Theophanes 2017-01-20 17:12:50 -08:00 committed by Brad Fitzpatrick
parent 1cf08182f9
commit 2b283cedef
2 changed files with 147 additions and 35 deletions

View file

@ -14,6 +14,7 @@ import (
"runtime"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
)
@ -1135,6 +1136,24 @@ func TestQueryRowClosingStmt(t *testing.T) {
}
}
var atomicRowsCloseHook atomic.Value // of func(*Rows, *error)
func init() {
rowsCloseHook = func() func(*Rows, *error) {
fn, _ := atomicRowsCloseHook.Load().(func(*Rows, *error))
return fn
}
}
func setRowsCloseHook(fn func(*Rows, *error)) {
if fn == nil {
// Can't change an atomic.Value back to nil, so set it to this
// no-op func instead.
fn = func(*Rows, *error) {}
}
atomicRowsCloseHook.Store(fn)
}
// Test issue 6651
func TestIssue6651(t *testing.T) {
db := newTestDB(t, "people")
@ -1147,6 +1166,7 @@ func TestIssue6651(t *testing.T) {
return fmt.Errorf(want)
}
defer func() { rowsCursorNextHook = nil }()
err := db.QueryRow("SELECT|people|name|").Scan(&v)
if err == nil || err.Error() != want {
t.Errorf("error = %q; want %q", err, want)
@ -1154,10 +1174,10 @@ func TestIssue6651(t *testing.T) {
rowsCursorNextHook = nil
want = "error in rows.Close"
rowsCloseHook = func(rows *Rows, err *error) {
setRowsCloseHook(func(rows *Rows, err *error) {
*err = fmt.Errorf(want)
}
defer func() { rowsCloseHook = nil }()
})
defer setRowsCloseHook(nil)
err = db.QueryRow("SELECT|people|name|").Scan(&v)
if err == nil || err.Error() != want {
t.Errorf("error = %q; want %q", err, want)
@ -1830,7 +1850,9 @@ func TestStmtCloseDeps(t *testing.T) {
db.dumpDeps(t)
}
if len(stmt.css) > nquery {
if !waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
return len(stmt.css) <= nquery
}) {
t.Errorf("len(stmt.css) = %d; want <= %d", len(stmt.css), nquery)
}
@ -2576,10 +2598,10 @@ func TestIssue6081(t *testing.T) {
if err != nil {
t.Fatal(err)
}
rowsCloseHook = func(rows *Rows, err *error) {
setRowsCloseHook(func(rows *Rows, err *error) {
*err = driver.ErrBadConn
}
defer func() { rowsCloseHook = nil }()
})
defer setRowsCloseHook(nil)
for i := 0; i < 10; i++ {
rows, err := stmt.Query()
if err != nil {
@ -2642,7 +2664,10 @@ func TestIssue18429(t *testing.T) {
if err != nil {
return
}
rows, err := tx.QueryContext(ctx, "WAIT|"+qwait+"|SELECT|people|name|")
// This is expected to give a cancel error many, but not all the time.
// Test failure will happen with a panic or other race condition being
// reported.
rows, _ := tx.QueryContext(ctx, "WAIT|"+qwait+"|SELECT|people|name|")
if rows != nil {
rows.Close()
}
@ -2655,6 +2680,56 @@ func TestIssue18429(t *testing.T) {
time.Sleep(milliWait * 3 * time.Millisecond)
}
// TestIssue18719 closes the context right before use. The sql.driverConn
// will nil out the ci on close in a lock, but if another process uses it right after
// it will panic with on the nil ref.
//
// See https://golang.org/cl/35550 .
func TestIssue18719(t *testing.T) {
db := newTestDB(t, "people")
defer closeDB(t, db)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
tx, err := db.BeginTx(ctx, nil)
if err != nil {
t.Fatal(err)
}
hookTxGrabConn = func() {
cancel()
// Wait for the context to cancel and tx to rollback.
for tx.isDone() == false {
time.Sleep(time.Millisecond * 3)
}
}
defer func() { hookTxGrabConn = nil }()
// This call will grab the connection and cancel the context
// after it has done so. Code after must deal with the canceled state.
rows, err := tx.QueryContext(ctx, "SELECT|people|name|")
if err != nil {
rows.Close()
t.Fatalf("expected error %v but got %v", nil, err)
}
// Rows may be ignored because it will be closed when the context is canceled.
// Do not explicitly rollback. The rollback will happen from the
// canceled context.
// Wait for connections to return to pool.
var numOpen int
if !waitCondition(5*time.Second, 5*time.Millisecond, func() bool {
numOpen = db.numOpenConns()
return numOpen == 0
}) {
t.Fatalf("open conns after hitting EOF = %d; want 0", numOpen)
}
}
func TestConcurrency(t *testing.T) {
doConcurrentTest(t, new(concurrentDBQueryTest))
doConcurrentTest(t, new(concurrentDBExecTest))