net: fix connection resets when closed on windows

It is common to close network connection while another goroutine is
blocked reading on another goroutine. This sequence corresponds to
windows calls to WSARecv to start io, followed by GetQueuedCompletionStatus
that blocks until io completes, and, finally, closesocket called from
another thread. We were expecting that closesocket would unblock
GetQueuedCompletionStatus, and it does, but not always
(http://code.google.com/p/go/issues/detail?id=4170#c5). Also that sequence
results in connection is being reset.

This CL inserts CancelIo between GetQueuedCompletionStatus and closesocket,
and waits for both WSARecv and GetQueuedCompletionStatus to complete before
proceeding to closesocket.  This seems to fix both connection resets and
issue 4170. It also makes windows code behave similar to unix version.

Unfortunately, CancelIo needs to be called on the same thread as WSARecv.
So we have to employ strategy we use for connections with deadlines to
every connection now. It means, there are 2 unavoidable thread switches
for every io. Some newer versions of windows have new CancelIoEx api that
doesn't have these drawbacks, and this CL uses this capability when available.
As time goes by, we should have less of CancelIo and more of CancelIoEx
systems. Computers with CancelIoEx are also not affected by issue 4195 anymore.

Fixes #3710
Fixes #3746
Fixes #4170
Partial fix for issue 4195

R=golang-dev, mikioh.mikioh, bradfitz, rsc
CC=golang-dev
https://golang.org/cl/6604072
This commit is contained in:
Alex Brainman 2012-10-31 10:24:37 +11:00
parent ad487dad75
commit fa3e4fc429
10 changed files with 265 additions and 76 deletions

View file

@ -146,3 +146,82 @@ func TestTimeoutAccept(t *testing.T) {
// Pass.
}
}
func TestReadWriteDeadline(t *testing.T) {
if !canCancelIO {
t.Logf("skipping test on this system")
return
}
const (
readTimeout = 100 * time.Millisecond
writeTimeout = 200 * time.Millisecond
delta = 40 * time.Millisecond
)
checkTimeout := func(command string, start time.Time, should time.Duration) {
is := time.Now().Sub(start)
d := should - is
if d < -delta || delta < d {
t.Errorf("%s timeout test failed: is=%v should=%v\n", command, is, should)
}
}
ln, err := Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("ListenTCP on :0: %v", err)
}
lnquit := make(chan bool)
go func() {
c, err := ln.Accept()
if err != nil {
t.Fatalf("Accept: %v", err)
}
defer c.Close()
lnquit <- true
}()
c, err := Dial("tcp", ln.Addr().String())
if err != nil {
t.Fatalf("Dial: %v", err)
}
defer c.Close()
start := time.Now()
err = c.SetReadDeadline(start.Add(readTimeout))
if err != nil {
t.Fatalf("SetReadDeadline: %v", err)
}
err = c.SetWriteDeadline(start.Add(writeTimeout))
if err != nil {
t.Fatalf("SetWriteDeadline: %v", err)
}
quit := make(chan bool)
go func() {
var buf [10]byte
_, err = c.Read(buf[:])
if err == nil {
t.Errorf("Read should not succeed")
}
checkTimeout("Read", start, readTimeout)
quit <- true
}()
go func() {
var buf [10000]byte
for {
_, err = c.Write(buf[:])
if err != nil {
break
}
}
checkTimeout("Write", start, writeTimeout)
quit <- true
}()
<-quit
<-quit
<-lnquit
}