From 6dc67b97286dff71ebc1554317fe97e55ebb15da Mon Sep 17 00:00:00 2001 From: David Justo Date: Tue, 9 Jul 2024 11:42:24 -0700 Subject: [PATCH] Minimize telemetry surface area (#2844) --- .../DurableEntityContext.cs | 16 +- .../DurableOrchestrationContext.cs | 8 +- .../RemoteOrchestratorContext.cs | 15 +- .../DurableTaskExtension.cs | 38 +--- .../EndToEndTraceHelper.cs | 198 +++++++++++++----- .../EtwEventSource.cs | 31 ++- .../Listener/TaskActivityShim.cs | 13 +- .../Listener/TaskEntityShim.cs | 18 +- .../Listener/TaskOrchestrationShim.cs | 18 +- .../Listener/WrappedFunctionResult.cs | 5 +- .../OutOfProcMiddleware.cs | 24 +-- test/Common/TestHelpers.cs | 4 +- test/FunctionsV2/EndToEndTraceHelperTests.cs | 108 ++++++++++ 13 files changed, 341 insertions(+), 155 deletions(-) create mode 100644 test/FunctionsV2/EndToEndTraceHelperTests.cs diff --git a/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableEntityContext.cs b/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableEntityContext.cs index 0b658eca5..228034dd3 100644 --- a/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableEntityContext.cs +++ b/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableEntityContext.cs @@ -122,13 +122,12 @@ bool IDurableEntityContext.HasState public void CaptureInternalError(Exception e, TaskEntityShim shim) { // first, try to get a quick ETW message out to help us diagnose what happened - string details = Utils.IsFatal(e) ? e.GetType().Name : e.ToString(); this.Config.TraceHelper.EntityBatchFailed( this.HubName, this.Name, this.InstanceId, shim.TraceFlags, - details); + e); // then, record the error for additional reporting and tracking in other places this.InternalError = ExceptionDispatchInfo.Capture(e); @@ -180,22 +179,27 @@ public void ThrowApplicationExceptionsIfAny() } } - public bool ErrorsPresent(out string description) + public bool ErrorsPresent(out string error, out string sanitizedError) { if (this.InternalError != null) { - description = $"Internal error: {this.InternalError.SourceException}"; + error = $"Internal error: {this.InternalError.SourceException}"; + sanitizedError = $"Internal error: {this.InternalError.SourceException.GetType().FullName} \n {this.InternalError.SourceException.StackTrace}"; return true; } else if (this.ApplicationErrors != null) { var messages = this.ApplicationErrors.Select(i => $"({i.SourceException.Message})"); - description = $"One or more operations failed: {string.Concat(messages)}"; + error = $"One or more operations failed: {string.Concat(messages)}"; + + string errorTypes = string.Join(", ", this.ApplicationErrors.Select(i => i.SourceException.GetType().FullName)); + sanitizedError = $"One or more operations failed: {errorTypes}"; return true; } else { - description = string.Empty; + error = string.Empty; + sanitizedError = string.Empty; return false; } } diff --git a/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableOrchestrationContext.cs b/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableOrchestrationContext.cs index 572c1b747..782a4c8b1 100644 --- a/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableOrchestrationContext.cs +++ b/src/WebJobs.Extensions.DurableTask/ContextImplementations/DurableOrchestrationContext.cs @@ -781,7 +781,7 @@ internal async Task CallDurableTaskFunctionAsync( operationId, operationName, input: "(replayed)", - exception: "(replayed)", + exception: exception, duration: 0, isReplay: true); } @@ -791,7 +791,7 @@ internal async Task CallDurableTaskFunctionAsync( this.Config.Options.HubName, functionName, this.InstanceId, - reason: $"(replayed {exception.GetType().Name})", + exception: exception, functionType: functionType, isReplay: true); } @@ -933,7 +933,7 @@ internal void RaiseEvent(string name, string input) FunctionType.Orchestrator, this.InstanceId, name, - this.Config.GetIntputOutputTrace(responseMessage.Result), + responseMessage.Result, this.IsReplaying); } else @@ -943,7 +943,7 @@ internal void RaiseEvent(string name, string input) this.Name, this.InstanceId, name, - this.Config.GetIntputOutputTrace(input), + input, this.IsReplaying); } diff --git a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs index 5a8a50482..9c4d6a02e 100644 --- a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs +++ b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs @@ -64,18 +64,11 @@ internal OrchestratorExecutionResult GetResult() return this.executionResult ?? throw new InvalidOperationException($"The execution result has not yet been set using {nameof(this.SetResult)}."); } - internal bool TryGetOrchestrationErrorDetails(out string details) + internal bool TryGetOrchestrationErrorDetails(out Exception? failure) { - if (this.failure != null) - { - details = this.failure.Message; - return true; - } - else - { - details = string.Empty; - return false; - } + bool hasError = this.failure != null; + failure = hasError ? this.failure : null; + return hasError; } internal void SetResult(IEnumerable actions, string customStatus) diff --git a/src/WebJobs.Extensions.DurableTask/DurableTaskExtension.cs b/src/WebJobs.Extensions.DurableTask/DurableTaskExtension.cs index 17c085a8b..35447f583 100644 --- a/src/WebJobs.Extensions.DurableTask/DurableTaskExtension.cs +++ b/src/WebJobs.Extensions.DurableTask/DurableTaskExtension.cs @@ -155,7 +155,7 @@ public DurableTaskExtension( ILogger logger = loggerFactory.CreateLogger(LoggerCategoryName); - this.TraceHelper = new EndToEndTraceHelper(logger, this.Options.Tracing.TraceReplayEvents); + this.TraceHelper = new EndToEndTraceHelper(logger, this.Options.Tracing.TraceReplayEvents, this.Options.Tracing.TraceInputsAndOutputs); this.LifeCycleNotificationHelper = lifeCycleNotificationHelper ?? this.CreateLifeCycleNotificationHelper(); this.durabilityProviderFactory = GetDurabilityProviderFactory(this.Options, logger, orchestrationServiceFactories); this.defaultDurabilityProvider = this.durabilityProviderFactory.GetDurabilityProvider(); @@ -1038,7 +1038,7 @@ private async Task EntityMiddleware(DispatchMiddlewareContext dispatchContext, F entityContext.HubName, entityContext.Name, entityContext.InstanceId, - this.GetIntputOutputTrace(runtimeState.Input), + runtimeState.Input, FunctionType.Entity, isReplay: false); @@ -1064,13 +1064,14 @@ private async Task EntityMiddleware(DispatchMiddlewareContext dispatchContext, F await next(); // 5. If there were internal or application errors, trace them for DF - if (entityContext.ErrorsPresent(out var description)) + if (entityContext.ErrorsPresent(out string description, out string sanitizedError)) { this.TraceHelper.FunctionFailed( entityContext.HubName, entityContext.Name, entityContext.InstanceId, description, + sanitizedReason: sanitizedError, functionType: FunctionType.Entity, isReplay: false); } @@ -1080,7 +1081,7 @@ private async Task EntityMiddleware(DispatchMiddlewareContext dispatchContext, F entityContext.HubName, entityContext.Name, entityContext.InstanceId, - this.GetIntputOutputTrace(entityContext.State.EntityState), + entityContext.State.EntityState, continuedAsNew: true, functionType: FunctionType.Entity, isReplay: false); @@ -1487,35 +1488,6 @@ bool HasActiveListeners(RegisteredFunctionInfo info) return false; } - internal string GetIntputOutputTrace(string rawInputOutputData) - { - if (this.Options.Tracing.TraceInputsAndOutputs) - { - return rawInputOutputData; - } - else if (rawInputOutputData == null) - { - return "(null)"; - } - else - { - // Azure Storage uses UTF-32 encoding for string payloads - return "(" + Encoding.UTF32.GetByteCount(rawInputOutputData) + " bytes)"; - } - } - - internal string GetExceptionTrace(string rawExceptionData) - { - if (rawExceptionData == null) - { - return "(null)"; - } - else - { - return rawExceptionData; - } - } - /// Task IAsyncConverter.ConvertAsync( HttpRequestMessage request, diff --git a/src/WebJobs.Extensions.DurableTask/EndToEndTraceHelper.cs b/src/WebJobs.Extensions.DurableTask/EndToEndTraceHelper.cs index e730d6e71..8d7e2ddd6 100644 --- a/src/WebJobs.Extensions.DurableTask/EndToEndTraceHelper.cs +++ b/src/WebJobs.Extensions.DurableTask/EndToEndTraceHelper.cs @@ -4,26 +4,31 @@ using System; using System.Diagnostics; using System.Net; +using DurableTask.Core.Common; +using DurableTask.Core.Exceptions; using Microsoft.Extensions.Logging; +#nullable enable namespace Microsoft.Azure.WebJobs.Extensions.DurableTask { internal class EndToEndTraceHelper { private static readonly string ExtensionVersion = FileVersionInfo.GetVersionInfo(typeof(DurableTaskExtension).Assembly.Location).FileVersion; - private static string appName; - private static string slotName; + private static string? appName; + private static string? slotName; private readonly ILogger logger; private readonly bool traceReplayEvents; + private readonly bool shouldTraceRawData; private long sequenceNumber; - public EndToEndTraceHelper(ILogger logger, bool traceReplayEvents) + public EndToEndTraceHelper(ILogger logger, bool traceReplayEvents, bool shouldTraceRawData = false) { this.logger = logger ?? throw new ArgumentNullException(nameof(logger)); this.traceReplayEvents = traceReplayEvents; + this.shouldTraceRawData = shouldTraceRawData; } public static string LocalAppName @@ -54,6 +59,54 @@ public static string LocalSlotName #pragma warning disable SA1117 // Parameters should be on same line or separate lines + internal void SanitizeString(string? rawPayload, out string iloggerString, out string durableKustoTableString) + { + string payload = rawPayload ?? string.Empty; + int numCharacters = payload.Length; + string sanitizedPayload = $"(Redacted {numCharacters} characters)"; + + // By default, both ilogger and kusto data should use the sanitized data + iloggerString = sanitizedPayload; + durableKustoTableString = sanitizedPayload; + + // IFF users opts into tracing raw data, then their ILogger gets the raw data + if (this.shouldTraceRawData) + { + iloggerString = payload; + } + } + + internal void SanitizeException(Exception? exception, out string iloggerExceptionString, out string durableKustoTableString) + { + // default case: exception is null + string rawError = string.Empty; + string sanitizedError = string.Empty; + + // if exception is not null + if (exception != null) + { + // common case if exception is not null + rawError = exception.ToString(); + sanitizedError = $"{exception.GetType().FullName}\n{exception.StackTrace}"; + + // if exception is an OrchestrationFailureException, we need to unravel the details + if (exception is OrchestrationFailureException orchestrationFailureException) + { + rawError = orchestrationFailureException.Details; + } + } + + // By default, both ilogger and kusto data should use the sanitized string + iloggerExceptionString = sanitizedError; + durableKustoTableString = sanitizedError; + + // IFF users opts into tracing raw data, then their ILogger gets the raw exception string + if (this.shouldTraceRawData) + { + iloggerExceptionString = rawError; + } + } + public void ExtensionInformationalEvent( string hubName, string instanceId, @@ -126,11 +179,13 @@ public void FunctionStarting( string hubName, string functionName, string instanceId, - string input, + string? input, FunctionType functionType, bool isReplay, int taskEventId = -1) { + this.SanitizeString(input, out string loggerInput, out string sanitizedInput); + if (this.ShouldLogEvent(isReplay)) { EtwEventSource.Instance.FunctionStarting( @@ -140,14 +195,14 @@ public void FunctionStarting( functionName, taskEventId, instanceId, - input, + sanitizedInput, functionType.ToString(), ExtensionVersion, isReplay); this.logger.LogInformation( "{instanceId}: Function '{functionName} ({functionType})' started. IsReplay: {isReplay}. Input: {input}. State: {state}. RuntimeStatus: {runtimeStatus}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}. TaskEventId: {taskEventId}", - instanceId, functionName, functionType, isReplay, input, FunctionState.Started, OrchestrationRuntimeStatus.Running, hubName, + instanceId, functionName, functionType, isReplay, loggerInput, FunctionState.Started, OrchestrationRuntimeStatus.Running, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++, taskEventId); } } @@ -211,12 +266,14 @@ public void FunctionCompleted( string hubName, string functionName, string instanceId, - string output, + string? output, bool continuedAsNew, FunctionType functionType, bool isReplay, int taskEventId = -1) { + this.SanitizeString(output, out string loggerOutput, out string sanitizedOutput); + if (this.ShouldLogEvent(isReplay)) { EtwEventSource.Instance.FunctionCompleted( @@ -226,7 +283,7 @@ public void FunctionCompleted( functionName, taskEventId, instanceId, - output, + sanitizedOutput, continuedAsNew, functionType.ToString(), ExtensionVersion, @@ -234,37 +291,19 @@ public void FunctionCompleted( this.logger.LogInformation( "{instanceId}: Function '{functionName} ({functionType})' completed. ContinuedAsNew: {continuedAsNew}. IsReplay: {isReplay}. Output: {output}. State: {state}. RuntimeStatus: {runtimeStatus}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}. TaskEventId: {taskEventId}", - instanceId, functionName, functionType, continuedAsNew, isReplay, output, FunctionState.Completed, OrchestrationRuntimeStatus.Completed, hubName, + instanceId, functionName, functionType, continuedAsNew, isReplay, loggerOutput, FunctionState.Completed, OrchestrationRuntimeStatus.Completed, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++, taskEventId); } } - public void ProcessingOutOfProcPayload( - string functionName, - string taskHub, - string instanceId, - string details) - { - EtwEventSource.Instance.ProcessingOutOfProcPayload( - functionName, - taskHub, - LocalAppName, - LocalSlotName, - instanceId, - details, - ExtensionVersion); - - this.logger.LogDebug( - "{instanceId}: Function '{functionName} ({functionType})' returned the following OOProc orchestration state: {details}. : {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, FunctionType.Orchestrator, details, taskHub, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); - } - public void FunctionTerminated( string hubName, string functionName, string instanceId, string reason) { + this.SanitizeString(reason, out string loggerReason, out string sanitizedReason); + FunctionType functionType = FunctionType.Orchestrator; EtwEventSource.Instance.FunctionTerminated( @@ -273,14 +312,14 @@ public void FunctionTerminated( LocalSlotName, functionName, instanceId, - reason, + sanitizedReason, functionType.ToString(), ExtensionVersion, IsReplay: false); this.logger.LogWarning( "{instanceId}: Function '{functionName} ({functionType})' was terminated. Reason: {reason}. State: {state}. RuntimeStatus: {runtimeStatus}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, functionType, reason, FunctionState.Terminated, OrchestrationRuntimeStatus.Terminated, hubName, + instanceId, functionName, functionType, loggerReason, FunctionState.Terminated, OrchestrationRuntimeStatus.Terminated, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } @@ -290,6 +329,8 @@ public void SuspendingOrchestration( string instanceId, string reason) { + this.SanitizeString(reason, out string loggerReason, out string sanitizedReason); + FunctionType functionType = FunctionType.Orchestrator; EtwEventSource.Instance.SuspendingOrchestration( @@ -298,14 +339,14 @@ public void SuspendingOrchestration( LocalSlotName, functionName, instanceId, - reason, + sanitizedReason, functionType.ToString(), ExtensionVersion, IsReplay: false); this.logger.LogInformation( "{instanceId}: Suspending function '{functionName} ({functionType})'. Reason: {reason}. State: {state}. RuntimeStatus: {runtimeStatus}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, functionType, reason, FunctionState.Suspended, OrchestrationRuntimeStatus.Suspended, hubName, + instanceId, functionName, functionType, loggerReason, FunctionState.Suspended, OrchestrationRuntimeStatus.Suspended, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } @@ -315,6 +356,8 @@ public void ResumingOrchestration( string instanceId, string reason) { + this.SanitizeString(reason, out string loggerReason, out string sanitizedReason); + FunctionType functionType = FunctionType.Orchestrator; EtwEventSource.Instance.ResumingOrchestration( @@ -323,14 +366,14 @@ public void ResumingOrchestration( LocalSlotName, functionName, instanceId, - reason, + sanitizedReason, functionType.ToString(), ExtensionVersion, IsReplay: false); this.logger.LogInformation( "{instanceId}: Resuming function '{functionName} ({functionType})'. Reason: {reason}. State: {state}. RuntimeStatus: {runtimeStatus}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, functionType, reason, FunctionState.Scheduled, OrchestrationRuntimeStatus.Running, hubName, + instanceId, functionName, functionType, loggerReason, FunctionState.Scheduled, OrchestrationRuntimeStatus.Running, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } @@ -340,6 +383,8 @@ public void FunctionRewound( string instanceId, string reason) { + this.SanitizeString(reason, out string loggerReason, out string sanitizedReason); + FunctionType functionType = FunctionType.Orchestrator; EtwEventSource.Instance.FunctionRewound( @@ -348,22 +393,36 @@ public void FunctionRewound( LocalSlotName, functionName, instanceId, - reason, + sanitizedReason, functionType.ToString(), ExtensionVersion, IsReplay: false); this.logger.LogWarning( "{instanceId}: Function '{functionName} ({functionType})' was rewound. Reason: {reason}. State: {state}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, functionType, reason, FunctionState.Rewound, hubName, + instanceId, functionName, functionType, loggerReason, FunctionState.Rewound, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } + public void FunctionFailed( + string hubName, + string functionName, + string instanceId, + Exception? exception, + FunctionType functionType, + bool isReplay, + int taskEventId = -1) + { + this.SanitizeException(exception, out string loggerReason, out string sanitizedReason); + this.FunctionFailed(hubName, functionName, instanceId, loggerReason, sanitizedReason, functionType, isReplay, taskEventId); + } + public void FunctionFailed( string hubName, string functionName, string instanceId, string reason, + string sanitizedReason, FunctionType functionType, bool isReplay, int taskEventId = -1) @@ -377,7 +436,7 @@ public void FunctionFailed( functionName, taskEventId, instanceId, - reason, + sanitizedReason, functionType.ToString(), ExtensionVersion, isReplay); @@ -424,6 +483,9 @@ public void OperationCompleted( double duration, bool isReplay) { + this.SanitizeString(input, out string loggerInput, out string sanitizedInput); + this.SanitizeString(output, out string loggerOutput, out string sanitizedOutput); + if (this.ShouldLogEvent(isReplay)) { EtwEventSource.Instance.OperationCompleted( @@ -434,8 +496,8 @@ public void OperationCompleted( instanceId, operationId, operationName, - input, - output, + sanitizedInput, + sanitizedOutput, duration, FunctionType.Entity.ToString(), ExtensionVersion, @@ -443,11 +505,27 @@ public void OperationCompleted( this.logger.LogInformation( "{instanceId}: Function '{functionName} ({functionType})' completed '{operationName}' operation {operationId} in {duration}ms. IsReplay: {isReplay}. Input: {input}. Output: {output}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, FunctionType.Entity, operationName, operationId, duration, isReplay, input, output, + instanceId, functionName, FunctionType.Entity, operationName, operationId, duration, isReplay, loggerInput, loggerOutput, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } } + public void OperationFailed( + string hubName, + string functionName, + string instanceId, + string operationId, + string operationName, + string input, + Exception exception, + double duration, + bool isReplay) + { + this.SanitizeString(input, out string loggerInput, out string sanitizedInput); + this.SanitizeException(exception, out string loggerException, out string sanitizedException); + this.OperationFailed(hubName, functionName, instanceId, operationId, operationName, sanitizedInput, loggerInput, sanitizedException, loggerException, duration, isReplay); + } + public void OperationFailed( string hubName, string functionName, @@ -458,6 +536,24 @@ public void OperationFailed( string exception, double duration, bool isReplay) + { + this.SanitizeString(input, out string loggerInput, out string sanitizedInput); + this.SanitizeString(exception, out string loggerException, out string sanitizedException); + this.OperationFailed(hubName, functionName, instanceId, operationId, operationName, sanitizedInput, loggerInput, sanitizedException, loggerException, duration, isReplay); + } + + private void OperationFailed( + string hubName, + string functionName, + string instanceId, + string operationId, + string operationName, + string sanitizedInput, + string loggerInput, + string sanitizedException, + string loggerException, + double duration, + bool isReplay) { if (this.ShouldLogEvent(isReplay)) { @@ -469,8 +565,8 @@ public void OperationFailed( instanceId, operationId, operationName, - input, - exception, + sanitizedInput, + sanitizedException, duration, FunctionType.Entity.ToString(), ExtensionVersion, @@ -478,7 +574,7 @@ public void OperationFailed( this.logger.LogError( "{instanceId}: Function '{functionName} ({functionType})' failed '{operationName}' operation {operationId} after {duration}ms with exception {exception}. Input: {input}. IsReplay: {isReplay}. HubName: {hubName}. AppName: {appName}. SlotName: {slotName}. ExtensionVersion: {extensionVersion}. SequenceNumber: {sequenceNumber}.", - instanceId, functionName, FunctionType.Entity, operationName, operationId, duration, exception, input, isReplay, hubName, + instanceId, functionName, FunctionType.Entity, operationName, operationId, duration, loggerException, loggerInput, isReplay, hubName, LocalAppName, LocalSlotName, ExtensionVersion, this.sequenceNumber++); } } @@ -491,6 +587,8 @@ public void ExternalEventRaised( string input, bool isReplay) { + this.SanitizeString(input, out string _, out string sanitizedInput); + if (this.ShouldLogEvent(isReplay)) { FunctionType functionType = FunctionType.Orchestrator; @@ -502,7 +600,7 @@ public void ExternalEventRaised( functionName, instanceId, eventName, - input, + sanitizedInput, functionType.ToString(), ExtensionVersion, isReplay); @@ -608,6 +706,8 @@ public void EntityResponseReceived( string result, bool isReplay) { + this.SanitizeString(result, out string _, out string sanitizedResult); + if (this.ShouldLogEvent(isReplay)) { EtwEventSource.Instance.EntityResponseReceived( @@ -617,7 +717,7 @@ public void EntityResponseReceived( functionName, instanceId, operationId, - result, + sanitizedResult, functionType.ToString(), ExtensionVersion, isReplay); @@ -806,9 +906,11 @@ public void EntityBatchFailed( string functionName, string instanceId, string traceFlags, - string details) + Exception error) { FunctionType functionType = FunctionType.Entity; + string details = Utils.IsFatal(error) ? error.GetType().Name : error.ToString(); + string sanitizedDetails = $"{error.GetType().FullName}\n{error.StackTrace}"; EtwEventSource.Instance.EntityBatchFailed( hubName, @@ -817,7 +919,7 @@ public void EntityBatchFailed( functionName, instanceId, traceFlags, - details, + sanitizedDetails, functionType.ToString(), ExtensionVersion); diff --git a/src/WebJobs.Extensions.DurableTask/EtwEventSource.cs b/src/WebJobs.Extensions.DurableTask/EtwEventSource.cs index 77f9bb0e4..019e27a2f 100644 --- a/src/WebJobs.Extensions.DurableTask/EtwEventSource.cs +++ b/src/WebJobs.Extensions.DurableTask/EtwEventSource.cs @@ -1,7 +1,6 @@ // Copyright (c) .NET Foundation. All rights reserved. // Licensed under the MIT License. See LICENSE in the project root for license information. - -using System; +#nullable enable using System.Diagnostics.Tracing; namespace Microsoft.Azure.WebJobs.Extensions.DurableTask @@ -43,7 +42,7 @@ public void FunctionStarting( string FunctionName, int TaskEventId, string InstanceId, - string Input, + string? Input, string FunctionType, string ExtensionVersion, bool IsReplay) @@ -88,7 +87,7 @@ public void ExternalEventRaised( string FunctionName, string InstanceId, string EventName, - string Input, + string? Input, string FunctionType, string ExtensionVersion, bool IsReplay) @@ -104,7 +103,7 @@ public void FunctionCompleted( string FunctionName, int TaskEventId, string InstanceId, - string Output, + string? Output, bool ContinuedAsNew, string FunctionType, string ExtensionVersion, @@ -204,7 +203,7 @@ public void EventGridNotificationException( string SlotName, string FunctionName, FunctionState FunctionState, - string Version, + string? Version, string InstanceId, string Details, string Reason, @@ -222,8 +221,8 @@ public void ExtensionInformationalEvent( string TaskHub, string AppName, string SlotName, - string FunctionName, - string InstanceId, + string? FunctionName, + string? InstanceId, string Details, string ExtensionVersion) { @@ -235,8 +234,8 @@ public void ExtensionWarningEvent( string TaskHub, string AppName, string SlotName, - string FunctionName, - string InstanceId, + string? FunctionName, + string? InstanceId, string Details, string ExtensionVersion) { @@ -265,7 +264,7 @@ public void FunctionRewound( string SlotName, string FunctionName, string InstanceId, - string Reason, + string? Reason, string FunctionType, string ExtensionVersion, bool IsReplay) @@ -297,7 +296,7 @@ public void EntityResponseReceived( string FunctionName, string InstanceId, string OperationId, - string Result, + string? Result, string FunctionType, string ExtensionVersion, bool IsReplay) @@ -313,7 +312,7 @@ public void EntityLockAcquired( string FunctionName, string InstanceId, string RequestingInstanceId, - string RequestingExecutionId, + string? RequestingExecutionId, string RequestId, string FunctionType, string ExtensionVersion, @@ -347,8 +346,8 @@ public void OperationCompleted( string InstanceId, string OperationId, string OperationName, - string Input, - string Output, + string? Input, + string? Output, double Duration, string FunctionType, string ExtensionVersion, @@ -366,7 +365,7 @@ public void OperationFailed( string InstanceId, string OperationId, string OperationName, - string Input, + string? Input, string Exception, double Duration, string FunctionType, diff --git a/src/WebJobs.Extensions.DurableTask/Listener/TaskActivityShim.cs b/src/WebJobs.Extensions.DurableTask/Listener/TaskActivityShim.cs index 9481a0be0..128ed4c44 100644 --- a/src/WebJobs.Extensions.DurableTask/Listener/TaskActivityShim.cs +++ b/src/WebJobs.Extensions.DurableTask/Listener/TaskActivityShim.cs @@ -10,6 +10,7 @@ using Microsoft.Azure.WebJobs.Host; using Microsoft.Azure.WebJobs.Host.Executors; +#nullable enable namespace Microsoft.Azure.WebJobs.Extensions.DurableTask { /// @@ -58,7 +59,7 @@ public override async Task RunAsync(TaskContext context, string rawInput this.config.Options.HubName, this.activityName, instanceId, - this.config.GetIntputOutputTrace(rawInput), + rawInput, functionType: FunctionType.Activity, isReplay: false, taskEventId: this.taskEventId); @@ -76,7 +77,7 @@ public override async Task RunAsync(TaskContext context, string rawInput this.config.Options.HubName, this.activityName, instanceId, - this.config.GetIntputOutputTrace(serializedOutput), + serializedOutput, continuedAsNew: false, functionType: FunctionType.Activity, isReplay: false, @@ -100,7 +101,7 @@ public override async Task RunAsync(TaskContext context, string rawInput case WrappedFunctionResult.FunctionResultStatus.FunctionTimeoutError: // Flow the original activity function exception to the orchestration // without the outer FunctionInvocationException. - Exception exceptionToReport = StripFunctionInvocationException(result.Exception); + Exception? exceptionToReport = StripFunctionInvocationException(result.Exception); if (OutOfProcExceptionHelpers.TryGetExceptionWithFriendlyMessage( exceptionToReport, @@ -113,13 +114,13 @@ public override async Task RunAsync(TaskContext context, string rawInput this.config.Options.HubName, this.activityName, instanceId, - exceptionToReport?.ToString() ?? string.Empty, + exceptionToReport, functionType: FunctionType.Activity, isReplay: false, taskEventId: this.taskEventId); throw new TaskFailureException( - $"Activity function '{this.activityName}' failed: {exceptionToReport.Message}", + $"Activity function '{this.activityName}' failed: {exceptionToReport!.Message}", Utils.SerializeCause(exceptionToReport, this.config.ErrorDataConverter)); default: // we throw a TaskFailureException to ensure deserialization is possible. @@ -143,7 +144,7 @@ internal void SetTaskEventId(int taskEventId) this.taskEventId = taskEventId; } - private static Exception StripFunctionInvocationException(Exception e) + private static Exception? StripFunctionInvocationException(Exception? e) { var infrastructureException = e as FunctionInvocationException; if (infrastructureException?.InnerException != null) diff --git a/src/WebJobs.Extensions.DurableTask/Listener/TaskEntityShim.cs b/src/WebJobs.Extensions.DurableTask/Listener/TaskEntityShim.cs index 1e0998ce4..648a86731 100644 --- a/src/WebJobs.Extensions.DurableTask/Listener/TaskEntityShim.cs +++ b/src/WebJobs.Extensions.DurableTask/Listener/TaskEntityShim.cs @@ -237,7 +237,7 @@ public override async Task Execute(OrchestrationContext innerContext, st this.context.Name, this.context.InstanceId, this.entityTraceInfo.TraceFlags, - this.context.InternalError.ToString()); + this.context.InternalError.SourceException); } else { @@ -533,8 +533,8 @@ private async Task ProcessOperationRequestAsync(RequestMessage request) this.context.InstanceId, request.Id.ToString(), request.Operation, - this.Config.GetIntputOutputTrace(this.context.RawInput), - this.Config.GetIntputOutputTrace(response.Result), + this.context.RawInput, + response.Result, stopwatch.Elapsed.TotalMilliseconds, isReplay: false); } @@ -546,8 +546,8 @@ private async Task ProcessOperationRequestAsync(RequestMessage request) this.context.InstanceId, request.Id.ToString(), request.Operation, - this.Config.GetIntputOutputTrace(this.context.RawInput), - exception.ToString(), + this.context.RawInput, + exception, stopwatch.Elapsed.TotalMilliseconds, isReplay: false); } @@ -638,8 +638,8 @@ private async Task ExecuteOutOfProcBatch() this.context.InstanceId, request.Id.ToString(), request.Operation, - this.Config.GetIntputOutputTrace(request.Input), - this.Config.GetIntputOutputTrace(result.Result), + request.Input, + result.Result, result.DurationInMilliseconds, isReplay: false); } @@ -654,8 +654,8 @@ private async Task ExecuteOutOfProcBatch() this.context.InstanceId, request.Id.ToString(), request.Operation, - this.Config.GetIntputOutputTrace(request.Input), - this.Config.GetIntputOutputTrace(result.Result), + request.Input, + result.Result, result.DurationInMilliseconds, isReplay: false); } diff --git a/src/WebJobs.Extensions.DurableTask/Listener/TaskOrchestrationShim.cs b/src/WebJobs.Extensions.DurableTask/Listener/TaskOrchestrationShim.cs index 8cb6a33ff..3f127dca5 100644 --- a/src/WebJobs.Extensions.DurableTask/Listener/TaskOrchestrationShim.cs +++ b/src/WebJobs.Extensions.DurableTask/Listener/TaskOrchestrationShim.cs @@ -75,7 +75,7 @@ public override async Task Execute(OrchestrationContext innerContext, st this.context.HubName, this.context.Name, this.context.InstanceId, - this.Config.GetIntputOutputTrace(serializedInput), + serializedInput, FunctionType.Orchestrator, this.context.IsReplaying); status = OrchestrationRuntimeStatus.Running; @@ -113,7 +113,7 @@ public override async Task Execute(OrchestrationContext innerContext, st this.context.HubName, this.context.Name, this.context.InstanceId, - this.Config.GetIntputOutputTrace(serializedOutput), + serializedOutput, this.context.ContinuedAsNew, FunctionType.Orchestrator, this.context.IsReplaying); @@ -184,14 +184,14 @@ private async Task InvokeUserCodeAndHandleResults( } catch (OrchestrationFailureException ex) { - this.TraceAndSendExceptionNotification(ex.Details); + this.TraceAndSendExceptionNotification(ex); this.context.OrchestrationException = ExceptionDispatchInfo.Capture(ex); throw ex; } } else { - this.TraceAndSendExceptionNotification(e.ToString()); + this.TraceAndSendExceptionNotification(e); var orchestrationException = new OrchestrationFailureException( $"Orchestrator function '{this.context.Name}' failed: {e.Message}", Utils.SerializeCause(e, innerContext.ErrorDataConverter)); @@ -212,13 +212,19 @@ private async Task InvokeUserCodeAndHandleResults( } } - private void TraceAndSendExceptionNotification(string exceptionDetails) + private void TraceAndSendExceptionNotification(Exception exception) { + string exceptionDetails = exception.Message; + if (exception is OrchestrationFailureException orchestrationFailureException) + { + exceptionDetails = orchestrationFailureException.Details; + } + this.config.TraceHelper.FunctionFailed( this.context.HubName, this.context.Name, this.context.InstanceId, - exceptionDetails, + exception: exception, FunctionType.Orchestrator, this.context.IsReplaying); diff --git a/src/WebJobs.Extensions.DurableTask/Listener/WrappedFunctionResult.cs b/src/WebJobs.Extensions.DurableTask/Listener/WrappedFunctionResult.cs index fade92f13..f758179d2 100644 --- a/src/WebJobs.Extensions.DurableTask/Listener/WrappedFunctionResult.cs +++ b/src/WebJobs.Extensions.DurableTask/Listener/WrappedFunctionResult.cs @@ -3,13 +3,14 @@ using System; +#nullable enable namespace Microsoft.Azure.WebJobs.Extensions.DurableTask.Listener { internal class WrappedFunctionResult { private WrappedFunctionResult( FunctionResultStatus status, - Exception ex) + Exception? ex) { this.Exception = ex; this.ExecutionStatus = status; @@ -24,7 +25,7 @@ internal enum FunctionResultStatus FunctionsHostStoppingError = 4, // host was shutting down; treated as a functions runtime error } - internal Exception Exception { get; } + internal Exception? Exception { get; } internal FunctionResultStatus ExecutionStatus { get; } diff --git a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs index 8a7983cba..4d514c670 100644 --- a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs +++ b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs @@ -95,7 +95,7 @@ public async Task CallOrchestratorAsync(DispatchMiddlewareContext dispatchContex this.Options.HubName, functionName.Name, instance.InstanceId, - isReplaying ? "(replay)" : this.extension.GetIntputOutputTrace(startEvent.Input), + startEvent.Input, FunctionType.Orchestrator, isReplaying); @@ -188,7 +188,7 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync( this.Options.HubName, functionName.Name, instance.InstanceId, - this.extension.GetIntputOutputTrace(context.SerializedOutput), + context.SerializedOutput, context.ContinuedAsNew, FunctionType.Orchestrator, isReplay: false); @@ -214,7 +214,7 @@ await this.LifeCycleNotificationHelper.OrchestratorCompletedAsync( isReplay: false); } } - else if (context.TryGetOrchestrationErrorDetails(out string details)) + else if (context.TryGetOrchestrationErrorDetails(out Exception? exception)) { // the function failed because the orchestrator failed. @@ -224,7 +224,7 @@ await this.LifeCycleNotificationHelper.OrchestratorCompletedAsync( this.Options.HubName, functionName.Name, instance.InstanceId, - details, + exception, FunctionType.Orchestrator, isReplay: false); @@ -232,7 +232,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync( this.Options.HubName, functionName.Name, instance.InstanceId, - details, + exception?.Message ?? string.Empty, isReplay: false); } else @@ -245,7 +245,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync( this.Options.HubName, functionName.Name, instance.InstanceId, - exceptionDetails, + functionResult.Exception, FunctionType.Orchestrator, isReplay: false); @@ -320,7 +320,7 @@ void SetErrorResult(FailureDetails failureDetails) this.Options.HubName, functionName.Name, batchRequest.InstanceId, - this.extension.GetIntputOutputTrace(batchRequest.EntityState), + batchRequest.EntityState, functionType: FunctionType.Entity, isReplay: false); @@ -396,7 +396,7 @@ void SetErrorResult(FailureDetails failureDetails) this.Options.HubName, functionName.Name, batchRequest.InstanceId, - functionResult.Exception.ToString(), + functionResult.Exception, FunctionType.Entity, isReplay: false); @@ -429,7 +429,7 @@ void SetErrorResult(FailureDetails failureDetails) this.Options.HubName, functionName.Name, batchRequest.InstanceId, - this.extension.GetIntputOutputTrace(batchRequest.EntityState), + batchRequest.EntityState, batchResult.EntityState != null, FunctionType.Entity, isReplay: false); @@ -496,7 +496,7 @@ public async Task CallActivityAsync(DispatchMiddlewareContext dispatchContext, F this.Options.HubName, functionName.Name, instance.InstanceId, - this.extension.GetIntputOutputTrace(rawInput), + rawInput, functionType: FunctionType.Activity, isReplay: false, taskEventId: scheduledEvent.EventId); @@ -542,7 +542,7 @@ public async Task CallActivityAsync(DispatchMiddlewareContext dispatchContext, F this.Options.HubName, functionName.Name, instance.InstanceId, - this.extension.GetIntputOutputTrace(serializedOutput), + serializedOutput, continuedAsNew: false, FunctionType.Activity, isReplay: false, @@ -562,7 +562,7 @@ public async Task CallActivityAsync(DispatchMiddlewareContext dispatchContext, F this.Options.HubName, functionName.Name, instance.InstanceId, - result.Exception.ToString(), + result.Exception, FunctionType.Activity, isReplay: false, scheduledEvent.EventId); diff --git a/test/Common/TestHelpers.cs b/test/Common/TestHelpers.cs index 990ba8515..b36480091 100644 --- a/test/Common/TestHelpers.cs +++ b/test/Common/TestHelpers.cs @@ -707,7 +707,7 @@ private static List GetLogs_UnhandledOrchestrationException(string messa var list = new List() { $"{messageId}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' scheduled. Reason: NewInstance. IsReplay: False.", - $"{messageId}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' started. IsReplay: False. Input: (null)", + $"{messageId}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' started. IsReplay: False. Input: ", $"{messageId}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' failed with an error. Reason: System.ArgumentNullException: Value cannot be null.", }; @@ -835,7 +835,7 @@ private static List GetLogs_Orchestration_Activity(string[] messageIds, $"{messageIds[1]}:0: Function '{orchestratorFunctionNames[1]} ({FunctionType.Orchestrator})' completed. ContinuedAsNew: False. IsReplay: False. Output: \"Hello,", $"{messageIds[0]}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' started. IsReplay: True.", $"{messageIds[0]}: Function '{orchestratorFunctionNames[1]} ({FunctionType.Orchestrator})' scheduled. Reason: OrchestratorGreeting. IsReplay: True.", - $"{messageIds[0]}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' completed. ContinuedAsNew: False. IsReplay: False. Output: (null)", + $"{messageIds[0]}: Function '{orchestratorFunctionNames[0]} ({FunctionType.Orchestrator})' completed. ContinuedAsNew: False. IsReplay: False. Output: ", }; return list; diff --git a/test/FunctionsV2/EndToEndTraceHelperTests.cs b/test/FunctionsV2/EndToEndTraceHelperTests.cs new file mode 100644 index 000000000..4ea1b4e06 --- /dev/null +++ b/test/FunctionsV2/EndToEndTraceHelperTests.cs @@ -0,0 +1,108 @@ +// Copyright (c) .NET Foundation. All rights reserved. +// Licensed under the MIT License. See LICENSE in the project root for license information. + +#nullable enable +using System; +using Microsoft.Azure.WebJobs.Extensions.DurableTask; +using Microsoft.Azure.WebJobs.Extensions.DurableTask.Tests; +using Microsoft.Extensions.Logging.Abstractions; +using Xunit; + +namespace WebJobs.Extensions.DurableTask.Tests.V2 +{ + public class EndToEndTraceHelperTests + { + [Theory] + [InlineData(true, "DO NOT LOG ME")] + [InlineData(false, "DO NOT LOG ME")] + [InlineData(true, null)] + [InlineData(false, null)] + [Trait("Category", PlatformSpecificHelpers.TestCategory)] + public void StringSanitizerTest( + bool shouldTraceRawData, + string? possiblySensitiveData) + { + // set up trace helper + var nullLogger = new NullLogger(); + var traceHelper = new EndToEndTraceHelper( + logger: nullLogger, + traceReplayEvents: false, // has not effect on sanitizer + shouldTraceRawData: shouldTraceRawData); + + // run sanitizer + traceHelper.SanitizeString( + rawPayload: possiblySensitiveData, + out string iLoggerString, + out string kustoTableString); + + // expected: sanitized string should not contain the sensitive data + // skip this check if data is null + if (possiblySensitiveData != null) + { + Assert.DoesNotContain(possiblySensitiveData, kustoTableString); + } + + if (shouldTraceRawData) + { + string expectedString = possiblySensitiveData ?? string.Empty; + Assert.Equal(expectedString, iLoggerString); + } + else + { + // If raw data is not being traced, + // kusto and the ilogger should get the same data + Assert.Equal(iLoggerString, kustoTableString); + } + } + + [Theory] + [InlineData(true, "DO NOT LOG ME")] + [InlineData(false, "DO NOT LOG ME")] + [InlineData(true, null)] + [InlineData(false, null)] + [Trait("Category", PlatformSpecificHelpers.TestCategory)] + public void ExceptionSanitizerTest( + bool shouldTraceRawData, + string? possiblySensitiveData) + { + // set up trace helper + var nullLogger = new NullLogger(); + var traceHelper = new EndToEndTraceHelper( + logger: nullLogger, + traceReplayEvents: false, // has not effect on sanitizer + shouldTraceRawData: shouldTraceRawData); + + // exception to sanitize + Exception? exception = null; + if (possiblySensitiveData != null) + { + exception = new Exception(possiblySensitiveData); + } + + // run sanitizer + traceHelper.SanitizeException( + exception: exception, + out string iLoggerString, + out string kustoTableString); + + // exception message should not be part of the sanitized strings + // skip this check if data is null + if (possiblySensitiveData != null) + { + Assert.DoesNotContain(possiblySensitiveData, kustoTableString); + } + + if (shouldTraceRawData) + { + var expectedString = exception?.ToString() ?? string.Empty; + Assert.Equal(expectedString, iLoggerString); + } + else + { + // If raw data is not being traced, + // kusto and the ilogger should get the same data + Assert.Equal(iLoggerString, kustoTableString); + } + } + } +}