-
Notifications
You must be signed in to change notification settings - Fork 4.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLA-1767] Refactor communicator connection exceptions #4935
Changes from 4 commits
d5cb74c
31dbd85
6c50615
d39d03c
f807b86
d80234f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,8 +64,8 @@ public RpcCommunicator(CommunicatorInitParameters communicatorInitParameters) | |
|
||
internal static bool CheckCommunicationVersionsAreCompatible( | ||
string unityCommunicationVersion, | ||
string pythonApiVersion, | ||
string pythonLibraryVersion) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed this since it was unused. |
||
string pythonApiVersion | ||
) | ||
{ | ||
var unityVersion = new Version(unityCommunicationVersion); | ||
var pythonVersion = new Version(pythonApiVersion); | ||
|
@@ -92,9 +92,10 @@ internal static bool CheckCommunicationVersionsAreCompatible( | |
/// Sends the initialization parameters through the Communicator. | ||
/// Is used by the academy to send initialization parameters to the communicator. | ||
/// </summary> | ||
/// <returns>The External Initialization Parameters received.</returns> | ||
/// <returns>Whether the connection was successful.</returns> | ||
/// <param name="initParameters">The Unity Initialization Parameters to be sent.</param> | ||
public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameters) | ||
/// <param name="initParametersOut">The External Initialization Parameters received.</param> | ||
public bool Initialize(CommunicatorInitParameters initParameters, out UnityRLInitParameters initParametersOut) | ||
{ | ||
var academyParameters = new UnityRLInitializationOutputProto | ||
{ | ||
|
@@ -113,62 +114,75 @@ public UnityRLInitParameters Initialize(CommunicatorInitParameters initParameter | |
{ | ||
RlInitializationOutput = academyParameters | ||
}, | ||
out input); | ||
|
||
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved this logic outside of the |
||
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion; | ||
var unityCommunicationVersion = initParameters.unityCommunicationVersion; | ||
|
||
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion); | ||
|
||
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible(unityCommunicationVersion, | ||
pythonCommunicationVersion, | ||
pythonPackageVersion); | ||
|
||
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator | ||
// API strings, so log an explicit warning if that's the case. | ||
if (initializationInput != null && input == null) | ||
out input | ||
); | ||
} | ||
catch (Exception ex) | ||
surfnerd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
if (ex is RpcException) | ||
{ | ||
if (!communicationIsCompatible) | ||
var rpcException = ex as RpcException; | ||
switch (rpcException.Status.StatusCode) | ||
{ | ||
Debug.LogWarningFormat( | ||
"Communication protocol between python ({0}) and Unity ({1}) have different " + | ||
"versions which make them incompatible. Python library version: {2}.", | ||
pythonCommunicationVersion, initParameters.unityCommunicationVersion, | ||
pythonPackageVersion | ||
); | ||
case StatusCode.Unavailable: | ||
// This is the common case where there's no trainer to connect to. | ||
break; | ||
case StatusCode.DeadlineExceeded: | ||
// We don't currently set a deadline for connection, but likely will in the future. | ||
break; | ||
default: | ||
Debug.Log($"Unexpected gRPC exception when trying to initialize communication: {rpcException}"); | ||
break; | ||
} | ||
else | ||
{ | ||
Debug.LogWarningFormat( | ||
"Unknown communication error between Python. Python communication protocol: {0}, " + | ||
"Python library version: {1}.", | ||
pythonCommunicationVersion, | ||
pythonPackageVersion | ||
); | ||
} | ||
|
||
throw new UnityAgentsException("ICommunicator.Initialize() failed."); | ||
} | ||
else | ||
{ | ||
Debug.Log($"Unexpected exception when trying to initialize communication: {ex}"); | ||
} | ||
initParametersOut = new UnityRLInitParameters(); | ||
return false; | ||
} | ||
catch | ||
{ | ||
var exceptionMessage = "The Communicator was unable to connect. Please make sure the External " + | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed this messaging - I'm not sure it was ever useful, since the exception was swallowed by the calling code. |
||
"process is ready to accept communication with Unity."; | ||
|
||
// Check for common error condition and add details to the exception message. | ||
var httpProxy = Environment.GetEnvironmentVariable("HTTP_PROXY"); | ||
var httpsProxy = Environment.GetEnvironmentVariable("HTTPS_PROXY"); | ||
if (httpProxy != null || httpsProxy != null) | ||
var pythonPackageVersion = initializationInput.RlInitializationInput.PackageVersion; | ||
var pythonCommunicationVersion = initializationInput.RlInitializationInput.CommunicationVersion; | ||
|
||
TrainingAnalytics.SetTrainerInformation(pythonPackageVersion, pythonCommunicationVersion); | ||
|
||
var communicationIsCompatible = CheckCommunicationVersionsAreCompatible( | ||
initParameters.unityCommunicationVersion, | ||
pythonCommunicationVersion | ||
); | ||
|
||
// Initialization succeeded part-way. The most likely cause is a mismatch between the communicator | ||
// API strings, so log an explicit warning if that's the case. | ||
if (initializationInput != null && input == null) | ||
{ | ||
if (!communicationIsCompatible) | ||
{ | ||
exceptionMessage += " Try removing HTTP_PROXY and HTTPS_PROXY from the" + | ||
"environment variables and try again."; | ||
Debug.LogWarningFormat( | ||
"Communication protocol between python ({0}) and Unity ({1}) have different " + | ||
"versions which make them incompatible. Python library version: {2}.", | ||
pythonCommunicationVersion, initParameters.unityCommunicationVersion, | ||
pythonPackageVersion | ||
); | ||
} | ||
throw new UnityAgentsException(exceptionMessage); | ||
else | ||
{ | ||
Debug.LogWarningFormat( | ||
"Unknown communication error between Python. Python communication protocol: {0}, " + | ||
"Python library version: {1}.", | ||
pythonCommunicationVersion, | ||
pythonPackageVersion | ||
); | ||
} | ||
|
||
initParametersOut = new UnityRLInitParameters(); | ||
return false; | ||
} | ||
|
||
UpdateEnvironmentWithInput(input.RlInput); | ||
return initializationInput.RlInitializationInput.ToUnityRLInitParameters(); | ||
initParametersOut = initializationInput.RlInitializationInput.ToUnityRLInitParameters(); | ||
return true; | ||
} | ||
|
||
/// <summary> | ||
|
@@ -197,8 +211,7 @@ void UpdateEnvironmentWithInput(UnityRLInputProto rlInput) | |
SendCommandEvent(rlInput.Command); | ||
} | ||
|
||
UnityInputProto Initialize(UnityOutputProto unityOutput, | ||
out UnityInputProto unityInput) | ||
UnityInputProto Initialize(UnityOutputProto unityOutput, out UnityInputProto unityInput) | ||
{ | ||
#if UNITY_EDITOR || UNITY_STANDALONE_WIN || UNITY_STANDALONE_OSX || UNITY_STANDALONE_LINUX | ||
m_IsOpen = true; | ||
|
@@ -220,8 +233,7 @@ UnityInputProto Initialize(UnityOutputProto unityOutput, | |
} | ||
return result.UnityInput; | ||
#else | ||
throw new UnityAgentsException( | ||
"You cannot perform training on this platform."); | ||
throw new UnityAgentsException("You cannot perform training on this platform."); | ||
#endif | ||
} | ||
|
||
|
@@ -456,33 +468,35 @@ UnityInputProto Exchange(UnityOutputProto unityOutput) | |
QuitCommandReceived?.Invoke(); | ||
return message.UnityInput; | ||
} | ||
catch (RpcException rpcException) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Reorganized this to avoid duplicating some of the logic (e.g. |
||
catch (Exception ex) | ||
surfnerd marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
// Log more verbose errors if they're something the user can possibly do something about. | ||
switch (rpcException.Status.StatusCode) | ||
if (ex is RpcException) | ||
{ | ||
case StatusCode.Unavailable: | ||
// This can happen when python disconnects. Ignore it to avoid noisy logs. | ||
break; | ||
case StatusCode.ResourceExhausted: | ||
// This happens is the message body is too large. There's no way to | ||
// gracefully handle this, but at least we can show the message and the | ||
// user can try to reduce the number of agents or observation sizes. | ||
Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); | ||
break; | ||
default: | ||
// Other unknown errors. Log at INFO level. | ||
Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); | ||
break; | ||
var rpcException = ex as RpcException; | ||
// Log more verbose errors if they're something the user can possibly do something about. | ||
switch (rpcException.Status.StatusCode) | ||
{ | ||
case StatusCode.Unavailable: | ||
// This can happen when python disconnects. Ignore it to avoid noisy logs. | ||
break; | ||
case StatusCode.ResourceExhausted: | ||
// This happens is the message body is too large. There's no way to | ||
// gracefully handle this, but at least we can show the message and the | ||
// user can try to reduce the number of agents or observation sizes. | ||
Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); | ||
break; | ||
default: | ||
// Other unknown errors. Log at INFO level. | ||
Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); | ||
break; | ||
} | ||
} | ||
m_IsOpen = false; | ||
QuitCommandReceived?.Invoke(); | ||
return null; | ||
} | ||
catch (Exception ex) | ||
{ | ||
// Fall-through for other error types | ||
Debug.LogError($"GRPC Exception: {ex.Message}. Disconnecting from trainer."); | ||
else | ||
{ | ||
// Fall-through for other error types | ||
Debug.LogError($"Communication Exception: {ex.Message}. Disconnecting from trainer."); | ||
} | ||
|
||
m_IsOpen = false; | ||
QuitCommandReceived?.Invoke(); | ||
return null; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
using System.Collections.Generic; | ||
using System; | ||
using UnityEngine; | ||
|
||
namespace Unity.MLAgents.SideChannels | ||
{ | ||
|
@@ -34,9 +35,18 @@ public Guid ChannelId | |
|
||
internal void ProcessMessage(byte[] msg) | ||
{ | ||
using (var incomingMsg = new IncomingMessage(msg)) | ||
try | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This threw me off when I was working on the Training Analytics side channel. If the SideChannel throws here, we disconnect the trainer. In general, I think we should wrap user code in bubble wrap. |
||
{ | ||
OnMessageReceived(incomingMsg); | ||
using (var incomingMsg = new IncomingMessage(msg)) | ||
{ | ||
OnMessageReceived(incomingMsg); | ||
} | ||
} | ||
catch (Exception ex) | ||
{ | ||
// Catch all errors in the sidechannel processing, so that a single | ||
// bad SideChannel implementation doesn't take everything down with it. | ||
Debug.LogError($"Error processing SideChannel message: {ex}.\nThe message will be skipped."); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not an else after the first
if (initSuccessful) {}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's meant to handle both
Communicator.Initialize
returning false and throwing. But I think you're right, it's cleaner to handle each separately.