-
Notifications
You must be signed in to change notification settings - Fork 357
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improvements to EventHub/ServiceBus error handling (#1784, #1760) #1788
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the MIT License. See License.txt in the project root for license information. | ||
|
||
using System.Diagnostics; | ||
using Microsoft.Extensions.Logging; | ||
|
||
namespace Microsoft.Azure.WebJobs.Host | ||
{ | ||
internal static class LogLevelExtensions | ||
{ | ||
internal static TraceLevel ToTraceLevel(this LogLevel logLevel) | ||
{ | ||
TraceLevel level = TraceLevel.Off; | ||
switch (logLevel) | ||
{ | ||
case LogLevel.Critical: | ||
case LogLevel.Error: | ||
level = TraceLevel.Error; | ||
break; | ||
|
||
case LogLevel.Trace: | ||
case LogLevel.Debug: | ||
level = TraceLevel.Verbose; | ||
break; | ||
|
||
case LogLevel.Information: | ||
level = TraceLevel.Info; | ||
break; | ||
|
||
case LogLevel.Warning: | ||
level = TraceLevel.Warning; | ||
break; | ||
|
||
default: | ||
break; | ||
} | ||
return level; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,23 +51,15 @@ public void Initialize(ExtensionConfigContext context) | |
throw new ArgumentNullException("context"); | ||
} | ||
|
||
// Register an exception handler for background exceptions | ||
// coming from MessageReceivers. | ||
// | ||
// The message options is a host level instance that is shared | ||
// across all bindings, so we have to subscribe to it at the | ||
// host level. | ||
Config.MessageOptions.ExceptionReceived += (s, e) => | ||
{ | ||
Utility.LogExceptionReceivedEvent(e, "MessageReceiver", context.Trace, context.Config.LoggerFactory); | ||
}; | ||
|
||
// get the services we need to construct our binding providers | ||
INameResolver nameResolver = context.Config.GetService<INameResolver>(); | ||
IExtensionRegistry extensions = context.Config.GetService<IExtensionRegistry>(); | ||
|
||
// register the background exception handler | ||
var exceptionHandler = MessagingExceptionHandler.Subscribe(Config.MessageOptions, context.Trace, context.Config.LoggerFactory); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The key for ServiceBus issue fix was to capture the exception handler and flow it down to the listener, so it can be unsubscribed BEFORE we abort the messaging factory. |
||
|
||
// register our trigger binding provider | ||
ServiceBusTriggerAttributeBindingProvider triggerBindingProvider = new ServiceBusTriggerAttributeBindingProvider(nameResolver, _serviceBusConfig); | ||
ServiceBusTriggerAttributeBindingProvider triggerBindingProvider = new ServiceBusTriggerAttributeBindingProvider(nameResolver, _serviceBusConfig, exceptionHandler); | ||
extensions.RegisterExtension<ITriggerBindingProvider>(triggerBindingProvider); | ||
|
||
// register our binding provider | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -444,6 +444,9 @@ void IExtensionConfigProvider.Initialize(ExtensionConfigContext context) | |
.AddConverter<byte[], EventData>(ConvertBytes2EventData) | ||
.AddConverter<EventData, byte[]>(ConvertEventData2Bytes); | ||
|
||
// register the background exception handler | ||
MessagingExceptionHandler.Subscribe(_options, context.Trace, context.Config.LoggerFactory); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't need to unsubscribe for EventHubs - the way our listener unregisters the EPH shuts down cleanly. I had flowed it down anyways, however we don't want to unregister before we close the EPH (since any errors happening during graceful shutdown we want to log). |
||
|
||
// register our trigger binding provider | ||
INameResolver nameResolver = context.Config.NameResolver; | ||
IConverterManager cm = context.Config.GetService<IConverterManager>(); | ||
|
@@ -454,17 +457,6 @@ void IExtensionConfigProvider.Initialize(ExtensionConfigContext context) | |
// register our binding provider | ||
context.AddBindingRule<EventHubAttribute>() | ||
.BindToCollector(BuildFromAttribute); | ||
|
||
// Register an exception handler for background exceptions | ||
// coming from the EventProcessorHost. | ||
// | ||
// EventProcessorOptions is a host level instance that is shared | ||
// across all bindings, so we have to subscribe to it at the | ||
// host level. | ||
_options.ExceptionReceived += (s, e) => | ||
{ | ||
Utility.LogExceptionReceivedEvent(e, "EventProcessorHost", context.Trace, context.Config.LoggerFactory); | ||
}; | ||
} | ||
|
||
private IAsyncCollector<EventData> BuildFromAttribute(EventHubAttribute attribute) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
// Copyright (c) .NET Foundation. All rights reserved. | ||
// Licensed under the MIT License. See License.txt in the project root for license information. | ||
|
||
using System; | ||
using Microsoft.Azure.WebJobs.Host; | ||
using Microsoft.Azure.WebJobs.Logging; | ||
using Microsoft.Extensions.Logging; | ||
using Microsoft.ServiceBus.Messaging; | ||
|
||
namespace Microsoft.Azure.WebJobs.ServiceBus | ||
{ | ||
internal abstract class MessagingExceptionHandler | ||
{ | ||
private readonly TraceWriter _traceWriter; | ||
private readonly ILoggerFactory _loggerFactory; | ||
private string _source; | ||
|
||
public MessagingExceptionHandler(string source, TraceWriter traceWriter, ILoggerFactory loggerFactory = null) | ||
{ | ||
if (string.IsNullOrEmpty(source)) | ||
{ | ||
throw new ArgumentNullException(nameof(source)); | ||
} | ||
if (traceWriter == null) | ||
{ | ||
throw new ArgumentNullException(nameof(traceWriter)); | ||
} | ||
|
||
_source = source; | ||
_traceWriter = traceWriter; | ||
_loggerFactory = loggerFactory; | ||
} | ||
|
||
public static MessagingExceptionHandler Subscribe(EventProcessorOptions options, TraceWriter traceWriter, ILoggerFactory loggerFactory = null) | ||
{ | ||
var exceptionHandler = new EventHubExceptionHandler(options, traceWriter, loggerFactory); | ||
exceptionHandler.Subscribe(); | ||
return exceptionHandler; | ||
} | ||
|
||
public static MessagingExceptionHandler Subscribe(OnMessageOptions options, TraceWriter traceWriter, ILoggerFactory loggerFactory = null) | ||
{ | ||
var exceptionHandler = new ServiceBusExceptionHandler(options, traceWriter, loggerFactory); | ||
exceptionHandler.Subscribe(); | ||
return exceptionHandler; | ||
} | ||
|
||
public abstract void Subscribe(); | ||
|
||
public abstract void Unsubscribe(); | ||
|
||
protected void Handle(object sender, ExceptionReceivedEventArgs e) | ||
{ | ||
LogExceptionReceivedEvent(e); | ||
} | ||
|
||
internal void LogExceptionReceivedEvent(ExceptionReceivedEventArgs e) | ||
{ | ||
try | ||
{ | ||
var logger = _loggerFactory?.CreateLogger(LogCategories.Executor); | ||
string message = $"{_source} error (Action={e.Action}) : {e.Exception.ToString()}"; | ||
|
||
var logLevel = GetLogLevel(e.Exception); | ||
logger?.Log(logLevel, 0, message, e.Exception, (s, ex) => message); | ||
|
||
var traceEvent = new TraceEvent(logLevel.ToTraceLevel(), message, null, e.Exception); | ||
_traceWriter.Trace(traceEvent); | ||
} | ||
catch | ||
{ | ||
// best effort logging | ||
} | ||
} | ||
|
||
protected virtual LogLevel GetLogLevel(Exception ex) | ||
{ | ||
var mex = ex as MessagingException; | ||
if (!(ex is OperationCanceledException) && (mex == null || !mex.IsTransient)) | ||
{ | ||
// any non-transient exceptions or unknown exception types | ||
// we want to log as errors | ||
return LogLevel.Error; | ||
} | ||
else | ||
{ | ||
// transient messaging errors we log as verbose so we have a record | ||
// of them, but we don't treat them as actual errors | ||
return LogLevel.Information; | ||
} | ||
} | ||
|
||
private class EventHubExceptionHandler : MessagingExceptionHandler | ||
{ | ||
private readonly EventProcessorOptions _options; | ||
|
||
public EventHubExceptionHandler(EventProcessorOptions options, TraceWriter traceWriter, ILoggerFactory loggerFactory = null) | ||
: base("EventProcessorHost", traceWriter, loggerFactory) | ||
{ | ||
if (options == null) | ||
{ | ||
throw new ArgumentNullException(nameof(options)); | ||
} | ||
|
||
_options = options; | ||
} | ||
|
||
public override void Subscribe() | ||
{ | ||
_options.ExceptionReceived += Handle; | ||
} | ||
|
||
public override void Unsubscribe() | ||
{ | ||
_options.ExceptionReceived -= Handle; | ||
} | ||
|
||
protected override LogLevel GetLogLevel(Exception ex) | ||
{ | ||
if (ex is ReceiverDisconnectedException || | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the key change for the EventHub issue fix - overriding the base level determination to make these Info level rather than errors as they are now (generating CRIs) |
||
ex is LeaseLostException) | ||
{ | ||
// For EventProcessorHost these exceptions can happen as part | ||
// of normal partition balancing across instances, so we want to | ||
// trace them, but not treat them as errors. | ||
return LogLevel.Information; | ||
} | ||
|
||
return base.GetLogLevel(ex); | ||
} | ||
} | ||
|
||
private class ServiceBusExceptionHandler : MessagingExceptionHandler | ||
{ | ||
private readonly OnMessageOptions _options; | ||
|
||
public ServiceBusExceptionHandler(OnMessageOptions options, TraceWriter traceWriter, ILoggerFactory loggerFactory = null) | ||
: base("MessageReceiver", traceWriter, loggerFactory) | ||
{ | ||
if (options == null) | ||
{ | ||
throw new ArgumentNullException(nameof(options)); | ||
} | ||
|
||
_options = options; | ||
} | ||
|
||
public override void Subscribe() | ||
{ | ||
_options.ExceptionReceived += Handle; | ||
} | ||
|
||
public override void Unsubscribe() | ||
{ | ||
_options.ExceptionReceived -= Handle; | ||
} | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Copied this from the functions runtime codebase