-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix RHEL7 socket dispose hang, and extend coverage #43409
Changes from all commits
9f2c092
37176da
b7b8d2c
ee3dd19
48a69c4
512edf5
80501f9
4d83e73
ba5df5e
b301bb0
6158460
b77d31f
1da5006
5b9b879
102561a
f591786
6909cc8
8a3ce60
800d910
b371ae7
720c522
412a1df
86dca50
5b109e3
9842c81
1769820
7475c15
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3085,6 +3085,11 @@ int32_t SystemNative_Disconnect(intptr_t socket) | |
addr.sa_family = AF_UNSPEC; | ||
|
||
err = connect(fd, &addr, sizeof(addr)); | ||
if (err != 0) | ||
{ | ||
// On some older kernels connect(AF_UNSPEC) may fail. Fall back to shutdown in these cases: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
err = shutdown(fd, SHUT_RDWR); | ||
} | ||
#elif HAVE_DISCONNECTX | ||
// disconnectx causes a FIN close on OSX. It's the best we can do. | ||
err = disconnectx(fd, SAE_ASSOCID_ANY, SAE_CONNID_ANY); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,8 +7,10 @@ | |
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Microsoft.DotNet.RemoteExecutor; | ||
using Microsoft.DotNet.XUnitExtensions; | ||
using Xunit; | ||
using Xunit.Abstractions; | ||
using Xunit.Sdk; | ||
|
||
namespace System.Net.Sockets.Tests | ||
{ | ||
|
@@ -937,17 +939,26 @@ error is SocketException || | |
} | ||
} | ||
|
||
[Fact] | ||
public static readonly TheoryData<IPAddress> UdpReceiveGetsCanceledByDispose_Data = new TheoryData<IPAddress> | ||
{ | ||
{ IPAddress.Loopback }, | ||
{ IPAddress.IPv6Loopback }, | ||
{ IPAddress.Loopback.MapToIPv6() } | ||
}; | ||
|
||
[Theory] | ||
[MemberData(nameof(UdpReceiveGetsCanceledByDispose_Data))] | ||
[PlatformSpecific(~TestPlatforms.OSX)] // Not supported on OSX. | ||
public async Task UdpReceiveGetsCanceledByDispose() | ||
public async Task UdpReceiveGetsCanceledByDispose(IPAddress address) | ||
{ | ||
// We try this a couple of times to deal with a timing race: if the Dispose happens | ||
// before the operation is started, we won't see a SocketException. | ||
int msDelay = 100; | ||
await RetryHelper.ExecuteAsync(async () => | ||
{ | ||
var socket = new Socket(AddressFamily.InterNetwork, SocketType.Dgram, ProtocolType.Udp); | ||
socket.BindToAnonymousPort(IPAddress.Loopback); | ||
var socket = new Socket(address.AddressFamily, SocketType.Dgram, ProtocolType.Udp); | ||
if (address.IsIPv4MappedToIPv6) socket.DualMode = true; | ||
socket.BindToAnonymousPort(address); | ||
|
||
Task receiveTask = ReceiveAsync(socket, new ArraySegment<byte>(new byte[1])); | ||
|
||
|
@@ -956,11 +967,7 @@ await RetryHelper.ExecuteAsync(async () => | |
msDelay *= 2; | ||
Task disposeTask = Task.Run(() => socket.Dispose()); | ||
|
||
var cts = new CancellationTokenSource(); | ||
Task timeoutTask = Task.Delay(30000, cts.Token); | ||
Assert.NotSame(timeoutTask, await Task.WhenAny(disposeTask, receiveTask, timeoutTask)); | ||
cts.Cancel(); | ||
|
||
await Task.WhenAny(disposeTask, receiveTask).TimeoutAfter(30000); | ||
await disposeTask; | ||
|
||
SocketError? localSocketError = null; | ||
|
@@ -991,21 +998,35 @@ await RetryHelper.ExecuteAsync(async () => | |
{ | ||
Assert.Equal(SocketError.OperationAborted, localSocketError); | ||
} | ||
}, maxAttempts: 10); | ||
}, maxAttempts: 10, retryWhen: e => e is XunitException); | ||
} | ||
|
||
[Theory] | ||
[InlineData(true)] | ||
[InlineData(false)] | ||
public async Task TcpReceiveSendGetsCanceledByDispose(bool receiveOrSend) | ||
public static readonly TheoryData<bool, bool, bool> TcpReceiveSendGetsCanceledByDispose_Data = new TheoryData<bool, bool, bool> | ||
{ | ||
{ true, false, false }, | ||
{ true, false, true }, | ||
{ true, true, false }, | ||
{ false, false, false }, | ||
{ false, false, true }, | ||
{ false, true, false }, | ||
}; | ||
|
||
[Theory(Timeout = 40000)] | ||
[MemberData(nameof(TcpReceiveSendGetsCanceledByDispose_Data))] | ||
public async Task TcpReceiveSendGetsCanceledByDispose(bool receiveOrSend, bool ipv6Server, bool dualModeClient) | ||
{ | ||
// RHEL7 kernel has a bug preventing close(AF_UNKNOWN) to succeed with IPv6 sockets. | ||
// In this case Dispose will trigger a graceful shutdown, which means that receive will succeed on socket2. | ||
// TODO: Remove this, once CI machines are updated to a newer kernel. | ||
bool expectGracefulShutdown = UsesSync && PlatformDetection.IsRedHatFamily7 && receiveOrSend && (ipv6Server || dualModeClient); | ||
|
||
// We try this a couple of times to deal with a timing race: if the Dispose happens | ||
// before the operation is started, the peer won't see a ConnectionReset SocketException and we won't | ||
// see a SocketException either. | ||
int msDelay = 100; | ||
await RetryHelper.ExecuteAsync(async () => | ||
{ | ||
(Socket socket1, Socket socket2) = SocketTestExtensions.CreateConnectedSocketPair(); | ||
(Socket socket1, Socket socket2) = SocketTestExtensions.CreateConnectedSocketPair(ipv6Server, dualModeClient); | ||
using (socket2) | ||
{ | ||
Task socketOperation; | ||
|
@@ -1030,11 +1051,7 @@ await RetryHelper.ExecuteAsync(async () => | |
msDelay *= 2; | ||
Task disposeTask = Task.Run(() => socket1.Dispose()); | ||
|
||
var cts = new CancellationTokenSource(); | ||
Task timeoutTask = Task.Delay(30000, cts.Token); | ||
Assert.NotSame(timeoutTask, await Task.WhenAny(disposeTask, socketOperation, timeoutTask)); | ||
cts.Cancel(); | ||
|
||
await Task.WhenAny(disposeTask, socketOperation).TimeoutAfter(30000); | ||
await disposeTask; | ||
|
||
SocketError? localSocketError = null; | ||
|
@@ -1088,10 +1105,18 @@ await RetryHelper.ExecuteAsync(async () => | |
break; | ||
} | ||
} | ||
Assert.Equal(SocketError.ConnectionReset, peerSocketError); | ||
|
||
if (!expectGracefulShutdown) | ||
{ | ||
Assert.Equal(SocketError.ConnectionReset, peerSocketError); | ||
} | ||
else | ||
{ | ||
Assert.Null(peerSocketError); | ||
} | ||
Comment on lines
+1109
to
+1116
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tmds the test failure I referred to turned out to be quite simple to fix. |
||
} | ||
} | ||
}, maxAttempts: 10); | ||
}, maxAttempts: 10, retryWhen: e => e is XunitException); | ||
} | ||
|
||
[Fact] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: these are not
client
andserver
, but two connected TCP peers. Theserver
Socket doesn't leave the method, and it looks like we are not Disposing it, which we probably should.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The terminology reflects their roles during the establishment of the connection. I understand that this information is not needed afterwards, but I still find the labels useful for convenience, because it helps me mentally map the tests to a common real-world scenarios. For example
dualModeClient = false
refers to the firstSocket client
. and it sounds more natural thanbool dualModeSocket1
, which would be the case if we'd rename the tuple. We use this terminology in other tests, although it's not necessary or meaningful to label anything as client/server. This is not a very strong opinion although.You mean the
listner
I guess. I also don't like this, but this problem is not specific to the PR. We should fix it on all call sites which I would do separately.