From fc666f84f8359b6bc0edb0c36c6251106978dc98 Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Tue, 12 Mar 2024 16:57:12 +0100 Subject: [PATCH 1/8] Optimize runtime metrics --- .../RuntimeMetrics/PssRuntimeInformation.cs | 222 ++++++++++++++++++ .../RuntimeMetrics/RuntimeMetricsWriter.cs | 26 ++ .../RuntimeMetricsWriterTests.cs | 61 +++++ 3 files changed, 309 insertions(+) create mode 100644 tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs new file mode 100644 index 000000000000..5346fc0d9a92 --- /dev/null +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs @@ -0,0 +1,222 @@ +// +// Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. +// This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. +// + +using System; +using System.ComponentModel; +using System.Runtime.InteropServices; +using Datadog.Trace.Logging; + +namespace Datadog.Trace.RuntimeMetrics; + +// ReSharper disable InconsistentNaming UnusedMember.Local +internal class PssRuntimeInformation +{ + private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); + + private enum PSS_PROCESS_FLAGS + { + PSS_PROCESS_FLAGS_NONE = 0x00000000, + PSS_PROCESS_FLAGS_PROTECTED = 0x00000001, + PSS_PROCESS_FLAGS_WOW64 = 0x00000002, + PSS_PROCESS_FLAGS_RESERVED_03 = 0x00000004, + PSS_PROCESS_FLAGS_RESERVED_04 = 0x00000008, + PSS_PROCESS_FLAGS_FROZEN = 0x00000010 + } + + private enum PSS_QUERY_INFORMATION_CLASS + { + PSS_QUERY_PROCESS_INFORMATION = 0, + PSS_QUERY_VA_CLONE_INFORMATION = 1, + PSS_QUERY_AUXILIARY_PAGES_INFORMATION = 2, + PSS_QUERY_VA_SPACE_INFORMATION = 3, + PSS_QUERY_HANDLE_INFORMATION = 4, + PSS_QUERY_THREAD_INFORMATION = 5, + PSS_QUERY_HANDLE_TRACE_INFORMATION = 6, + PSS_QUERY_PERFORMANCE_COUNTERS = 7 + } + + [Flags] + private enum PSS_CAPTURE_FLAGS : uint + { + PSS_CAPTURE_NONE = 0x00000000, + PSS_CAPTURE_VA_CLONE = 0x00000001, + PSS_CAPTURE_RESERVED_00000002 = 0x00000002, + PSS_CAPTURE_HANDLES = 0x00000004, + PSS_CAPTURE_HANDLE_NAME_INFORMATION = 0x00000008, + PSS_CAPTURE_HANDLE_BASIC_INFORMATION = 0x00000010, + PSS_CAPTURE_HANDLE_TYPE_SPECIFIC_INFORMATION = 0x00000020, + PSS_CAPTURE_HANDLE_TRACE = 0x00000040, + PSS_CAPTURE_THREADS = 0x00000080, + PSS_CAPTURE_THREAD_CONTEXT = 0x00000100, + PSS_CAPTURE_THREAD_CONTEXT_EXTENDED = 0x00000200, + PSS_CAPTURE_RESERVED_00000400 = 0x00000400, + PSS_CAPTURE_VA_SPACE = 0x00000800, + PSS_CAPTURE_VA_SPACE_SECTION_INFORMATION = 0x00001000, + PSS_CREATE_BREAKAWAY_OPTIONAL = 0x04000000, + PSS_CREATE_BREAKAWAY = 0x08000000, + PSS_CREATE_FORCE_BREAKAWAY = 0x10000000, + PSS_CREATE_USE_VM_ALLOCATIONS = 0x20000000, + PSS_CREATE_MEASURE_PERFORMANCE = 0x40000000, + PSS_CREATE_RELEASE_SECTION = 0x80000000 + } + + private static IntPtr CurrentProcessHandle => new(-1); + + public static unsafe bool GetCurrentProcessMetrics(out TimeSpan userProcessorTime, out TimeSpan systemCpuTime, out int threadCount, out long privateMemorySize) + { + var snapshotHandle = IntPtr.Zero; + + try + { + var result = PssCaptureSnapshot(CurrentProcessHandle, PSS_CAPTURE_FLAGS.PSS_CAPTURE_THREADS, 0, out snapshotHandle); + + if (result != 0) + { + throw new Win32Exception(result, $"PssCaptureSnapshot failed with code {result}"); + } + + PSS_THREAD_INFORMATION threadInformation = default; + + result = PssQuerySnapshot(snapshotHandle, PSS_QUERY_INFORMATION_CLASS.PSS_QUERY_THREAD_INFORMATION, &threadInformation, Marshal.SizeOf()); + + if (result != 0) + { + throw new Win32Exception(result, $"PssQuerySnapshot with PSS_QUERY_THREAD_INFORMATION failed with code {result}"); + } + + threadCount = threadInformation.ThreadsCaptured; + + long userTime; + long kernelTime; + long memoryUsage; + + if (Environment.Is64BitProcess) + { + PSS_PROCESS_INFORMATION_64 processInformation; + + result = PssQuerySnapshot(snapshotHandle, PSS_QUERY_INFORMATION_CLASS.PSS_QUERY_PROCESS_INFORMATION, &processInformation, Marshal.SizeOf()); + + if (result != 0) + { + throw new Win32Exception(result, $"PssQuerySnapshot with PSS_QUERY_PROCESS_INFORMATION (64 bits) failed with code {result}"); + } + + userTime = processInformation.UserTime; + kernelTime = processInformation.KernelTime; + memoryUsage = (long)processInformation.PrivateUsage; + } + else + { + PSS_PROCESS_INFORMATION_32 processInformation; + + result = PssQuerySnapshot(snapshotHandle, PSS_QUERY_INFORMATION_CLASS.PSS_QUERY_PROCESS_INFORMATION, &processInformation, Marshal.SizeOf()); + + if (result != 0) + { + throw new Win32Exception(result, $"PssQuerySnapshot with PSS_QUERY_PROCESS_INFORMATION (32 bits) failed with code {result}"); + } + + userTime = processInformation.UserTime; + kernelTime = processInformation.KernelTime; + memoryUsage = (long)processInformation.PrivateUsage; + } + + userProcessorTime = TimeSpan.FromTicks(userTime); + systemCpuTime = TimeSpan.FromTicks(kernelTime); + privateMemorySize = memoryUsage; + return true; + } + finally + { + if (snapshotHandle != IntPtr.Zero) + { + var result = PssFreeSnapshot(CurrentProcessHandle, snapshotHandle); + + if (result != 0) + { + Log.Error("PssFreeSnapshot returned an error, the tracer might be leaking memory. Handle: {Handle}. Error code: {Result}.", snapshotHandle, result); + } + } + } + } + + [DllImport("kernel32.dll")] + private static extern int PssCaptureSnapshot(IntPtr processHandle, PSS_CAPTURE_FLAGS captureFlags, int threadContextFlags, out IntPtr snapshotHandle); + + [DllImport("kernel32.dll")] + private static extern int PssFreeSnapshot(IntPtr processHandle, IntPtr snapshotHandle); + + [DllImport("kernel32.dll")] + private static extern unsafe int PssQuerySnapshot(IntPtr snapshotHandle, PSS_QUERY_INFORMATION_CLASS informationClass, void* buffer, int bufferLength); + + [StructLayout(LayoutKind.Sequential)] + private struct PSS_THREAD_INFORMATION + { + public int ThreadsCaptured; + public int ContextLength; + } + + [StructLayout(LayoutKind.Sequential, Pack = 8)] + private unsafe struct PSS_PROCESS_INFORMATION_64 + { + public uint ExitStatus; + public IntPtr PebBaseAddress; + public nint AffinityMask; + public int BasePriority; + public uint ProcessId; + public uint ParentProcessId; + public PSS_PROCESS_FLAGS Flags; + public long CreateTime; + public long ExitTime; + public long KernelTime; + public long UserTime; + public uint PriorityClass; + public nint PeakVirtualSize; + public nint VirtualSize; + public uint PageFaultCount; + public nint PeakWorkingSetSize; + public nint WorkingSetSize; + public nint QuotaPeakPagedPoolUsage; + public nint QuotaPagedPoolUsage; + public nint QuotaPeakNonPagedPoolUsage; + public nint QuotaNonPagedPoolUsage; + public nint PagefileUsage; + public nint PeakPagefileUsage; + public nuint PrivateUsage; + public uint ExecuteFlags; + public fixed char ImageFileName[260]; + } + + [StructLayout(LayoutKind.Sequential, Pack = 4)] + private unsafe struct PSS_PROCESS_INFORMATION_32 + { + public uint ExitStatus; + public IntPtr PebBaseAddress; + public nint AffinityMask; + public int BasePriority; + public uint ProcessId; + public uint ParentProcessId; + public PSS_PROCESS_FLAGS Flags; + public long CreateTime; + public long ExitTime; + public long KernelTime; + public long UserTime; + public uint PriorityClass; + public nuint PeakVirtualSize; + public nuint VirtualSize; + public uint PageFaultCount; + public nuint PeakWorkingSetSize; + public nuint WorkingSetSize; + public nint QuotaPeakPagedPoolUsage; + public nint QuotaPagedPoolUsage; + public nint QuotaPeakNonPagedPoolUsage; + public nint QuotaNonPagedPoolUsage; + public nint PagefileUsage; + public nint PeakPagefileUsage; + public nuint PrivateUsage; + public uint ExecuteFlags; + public fixed char ImageFileName[260]; + } +} diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs index f9a86d1faf42..66dcd63e2c48 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs @@ -25,9 +25,13 @@ internal class RuntimeMetricsWriter : IDisposable private const string ProcessMetrics = $"{MetricsNames.ThreadsCount}, {MetricsNames.CommittedMemory}, {MetricsNames.CpuUserTime}, {MetricsNames.CpuSystemTime}, {MetricsNames.CpuPercentage}"; #endif + private static readonly Version Windows81Version = new(6, 3, 9600); + private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); private static readonly Func InitializeListenerFunc = InitializeListener; + private static int _pssConsecutiveFailures; + private readonly Process _process; private readonly TimeSpan _delay; @@ -233,6 +237,28 @@ private void FirstChanceException(object sender, FirstChanceExceptionEventArgs e private void GetCurrentProcessMetrics(out TimeSpan userProcessorTime, out TimeSpan systemCpuTime, out int threadCount, out long privateMemorySize) { + if (_pssConsecutiveFailures < 3 && Environment.OSVersion.Platform == PlatformID.Win32NT && Environment.OSVersion.Version > Windows81Version) + { + try + { + PssRuntimeInformation.GetCurrentProcessMetrics(out userProcessorTime, out systemCpuTime, out threadCount, out privateMemorySize); + _pssConsecutiveFailures = 0; + } + catch + { + _pssConsecutiveFailures += 1; + + if (_pssConsecutiveFailures >= 3) + { + Log.Error("Pss failed 3 times in a row, falling back to the Process API"); + } + + throw; + } + + return; + } + _process.Refresh(); userProcessorTime = _process.UserProcessorTime; systemCpuTime = _process.PrivilegedProcessorTime; diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs index 1985a0f426ce..dbd0dcc36907 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs @@ -4,10 +4,13 @@ // using System; +using System.Diagnostics; using System.Linq; using System.Threading; +using System.Threading.Tasks; using Datadog.Trace.RuntimeMetrics; using Datadog.Trace.Vendors.StatsdClient; +using FluentAssertions; using Moq; using Xunit; @@ -100,6 +103,64 @@ public void ShouldCaptureFirstChanceExceptions() } } + [Fact] + public async Task ShouldCaptureProcessMetrics() + { + var statsd = new Mock(); + var listener = new Mock(); + + using (new RuntimeMetricsWriter(statsd.Object, TimeSpan.FromSeconds(1), false, (_, _, _) => listener.Object)) + { + var expectedNumberOfThreads = Process.GetCurrentProcess().Threads.Count; + + var tcs = new TaskCompletionSource(); + + double? actualNumberOfThreads = null; + double? userCpuTime = null; + double? kernelCpuTime = null; + double? memoryUsage = null; + + statsd.Setup(s => s.Gauge(MetricsNames.ThreadsCount, It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, value, _, _) => actualNumberOfThreads = value); + + statsd.Setup(s => s.Gauge(MetricsNames.CommittedMemory, It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, value, _, _) => memoryUsage = value); + + statsd.Setup(s => s.Gauge(MetricsNames.CpuUserTime, It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, value, _, _) => userCpuTime = value); + + statsd.Setup(s => s.Gauge(MetricsNames.CpuSystemTime, It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, value, _, _) => kernelCpuTime = value); + + // CPU percentage is the last pushed event + statsd.Setup(s => s.Gauge(MetricsNames.CpuPercentage, It.IsAny(), It.IsAny(), It.IsAny())) + .Callback((_, _, _, _) => tcs.TrySetResult(true)); + + var timeout = Task.Delay(TimeSpan.FromSeconds(30)); + + await Task.WhenAny(tcs.Task, timeout); + + tcs.Task.IsCompleted.Should().BeTrue(); + + actualNumberOfThreads.Should().NotBeNull(); + + // To future generations: if 10 is not enough, feel free to bump it up. We're really just checking that the value is "realistic". + actualNumberOfThreads.Should() + .NotBeNull().And.BeGreaterThan(0).And.BeInRange(expectedNumberOfThreads - 10, expectedNumberOfThreads + 10); + + // CPU time and memory usage can vary wildly, so don't try too hard to validate + userCpuTime.Should().NotBeNull().And.BeGreaterThan(0); + + // Unfortunately we can't guarantee that the process will be eating kernel time, so greater or equal + kernelCpuTime.Should().NotBeNull().And.BeGreaterThanOrEqualTo(0); + + // Between 10MB and 100GB seems realistic. + // If in the future the tests runner really get below 10MB, congratulations! + // If it gets above 100GB, God save us all. + memoryUsage.Should().NotBeNull().And.BeInRange(10.0 * 1024 * 1024, 100.0 * 1024 * 1024 * 1024); + } + } + [Fact] public void CleanupResources() { From 395b7bbd46bd4a228f0fdbdc7f9f1ee83acf1916 Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Tue, 12 Mar 2024 17:22:45 +0100 Subject: [PATCH 2/8] Enable nullability annotations --- .../src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs index 5346fc0d9a92..ae80333ef656 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs @@ -3,6 +3,8 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. // +#nullable enable + using System; using System.ComponentModel; using System.Runtime.InteropServices; @@ -62,6 +64,8 @@ private enum PSS_CAPTURE_FLAGS : uint PSS_CREATE_RELEASE_SECTION = 0x80000000 } + // The value of the current process handle on Windows is hardcoded to 1 + // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getcurrentprocess#remarks private static IntPtr CurrentProcessHandle => new(-1); public static unsafe bool GetCurrentProcessMetrics(out TimeSpan userProcessorTime, out TimeSpan systemCpuTime, out int threadCount, out long privateMemorySize) From 0fe295ca7c0793caa050f01d8a9e3211e76e452b Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Tue, 12 Mar 2024 19:20:00 +0100 Subject: [PATCH 3/8] Address PR feedback and update snapshots --- ...s => ProcessSnapshotRuntimeInformation.cs} | 30 +++++++++---------- .../RuntimeMetrics/RuntimeMetricsWriter.cs | 2 +- ...ferencesHaveNotChanged.net6.0.verified.txt | 1 + ...sHaveNotChanged.netcoreapp3.1.verified.txt | 1 + 4 files changed, 18 insertions(+), 16 deletions(-) rename tracer/src/Datadog.Trace/RuntimeMetrics/{PssRuntimeInformation.cs => ProcessSnapshotRuntimeInformation.cs} (92%) diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs similarity index 92% rename from tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs rename to tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs index ae80333ef656..c63bae2b99ad 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/PssRuntimeInformation.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs @@ -1,4 +1,4 @@ -// +// // Unless explicitly stated otherwise all files in this repository are licensed under the Apache 2 License. // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2017 Datadog, Inc. // @@ -13,9 +13,9 @@ namespace Datadog.Trace.RuntimeMetrics; // ReSharper disable InconsistentNaming UnusedMember.Local -internal class PssRuntimeInformation +internal class ProcessSnapshotRuntimeInformation { - private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); + private static readonly IDatadogLogger Log = DatadogLogging.GetLoggerFor(); private enum PSS_PROCESS_FLAGS { @@ -107,8 +107,8 @@ public static unsafe bool GetCurrentProcessMetrics(out TimeSpan userProcessorTim throw new Win32Exception(result, $"PssQuerySnapshot with PSS_QUERY_PROCESS_INFORMATION (64 bits) failed with code {result}"); } - userTime = processInformation.UserTime; - kernelTime = processInformation.KernelTime; + userTime = (long)processInformation.UserTime; + kernelTime = (long)processInformation.KernelTime; memoryUsage = (long)processInformation.PrivateUsage; } else @@ -122,8 +122,8 @@ public static unsafe bool GetCurrentProcessMetrics(out TimeSpan userProcessorTim throw new Win32Exception(result, $"PssQuerySnapshot with PSS_QUERY_PROCESS_INFORMATION (32 bits) failed with code {result}"); } - userTime = processInformation.UserTime; - kernelTime = processInformation.KernelTime; + userTime = (long)processInformation.UserTime; + kernelTime = (long)processInformation.KernelTime; memoryUsage = (long)processInformation.PrivateUsage; } @@ -172,10 +172,10 @@ private unsafe struct PSS_PROCESS_INFORMATION_64 public uint ProcessId; public uint ParentProcessId; public PSS_PROCESS_FLAGS Flags; - public long CreateTime; - public long ExitTime; - public long KernelTime; - public long UserTime; + public ulong CreateTime; + public ulong ExitTime; + public ulong KernelTime; + public ulong UserTime; public uint PriorityClass; public nint PeakVirtualSize; public nint VirtualSize; @@ -203,10 +203,10 @@ private unsafe struct PSS_PROCESS_INFORMATION_32 public uint ProcessId; public uint ParentProcessId; public PSS_PROCESS_FLAGS Flags; - public long CreateTime; - public long ExitTime; - public long KernelTime; - public long UserTime; + public ulong CreateTime; + public ulong ExitTime; + public ulong KernelTime; + public ulong UserTime; public uint PriorityClass; public nuint PeakVirtualSize; public nuint VirtualSize; diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs index 66dcd63e2c48..3b48cb19ff3a 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs @@ -241,7 +241,7 @@ private void GetCurrentProcessMetrics(out TimeSpan userProcessorTime, out TimeSp { try { - PssRuntimeInformation.GetCurrentProcessMetrics(out userProcessorTime, out systemCpuTime, out threadCount, out privateMemorySize); + ProcessSnapshotRuntimeInformation.GetCurrentProcessMetrics(out userProcessorTime, out systemCpuTime, out threadCount, out privateMemorySize); _pssConsecutiveFailures = 0; } catch diff --git a/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.net6.0.verified.txt b/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.net6.0.verified.txt index 8bf969baacac..75a15feab326 100644 --- a/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.net6.0.verified.txt +++ b/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.net6.0.verified.txt @@ -6,6 +6,7 @@ Microsoft.AspNetCore.Routing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=a Microsoft.AspNetCore.Routing.Abstractions, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 Microsoft.Extensions.Primitives, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 Microsoft.Net.Http.Headers, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 +Microsoft.Win32.Primitives, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections.Concurrent, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections.NonGeneric, Version=6.0.0.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a diff --git a/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.netcoreapp3.1.verified.txt b/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.netcoreapp3.1.verified.txt index c0cbe7399a81..9bac8f2d9802 100644 --- a/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.netcoreapp3.1.verified.txt +++ b/tracer/test/Datadog.Trace.Tests/Snapshots/PublicApiTests.Datadog.Trace.AssemblyReferencesHaveNotChanged.netcoreapp3.1.verified.txt @@ -6,6 +6,7 @@ Microsoft.AspNetCore.Routing, Version=2.0.0.0, Culture=neutral, PublicKeyToken=a Microsoft.AspNetCore.Routing.Abstractions, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 Microsoft.Extensions.Primitives, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 Microsoft.Net.Http.Headers, Version=2.0.0.0, Culture=neutral, PublicKeyToken=adb9793829ddae60 +Microsoft.Win32.Primitives, Version=4.1.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections, Version=4.1.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections.Concurrent, Version=4.0.15.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a System.Collections.NonGeneric, Version=4.1.2.0, Culture=neutral, PublicKeyToken=b03f5f7f11d50a3a From c42fe0ba8b655c88869202fbb1796e5cb829cc2b Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Wed, 13 Mar 2024 13:27:11 +0100 Subject: [PATCH 4/8] Update trimming, try fixing tests --- .../build/Datadog.Trace.Trimming.xml | 5 +++++ .../RuntimeMetrics/RuntimeMetricsWriterTests.cs | 9 +++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml index 0d5f86ea299b..364182d80f1b 100644 --- a/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml +++ b/tracer/src/Datadog.Trace.Trimming/build/Datadog.Trace.Trimming.xml @@ -96,6 +96,9 @@ + + + @@ -713,6 +716,7 @@ + @@ -733,6 +737,7 @@ + diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs index dbd0dcc36907..53f9dec33146 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs @@ -9,6 +9,7 @@ using System.Threading; using System.Threading.Tasks; using Datadog.Trace.RuntimeMetrics; +using Datadog.Trace.TestHelpers; using Datadog.Trace.Vendors.StatsdClient; using FluentAssertions; using Moq; @@ -106,6 +107,10 @@ public void ShouldCaptureFirstChanceExceptions() [Fact] public async Task ShouldCaptureProcessMetrics() { + // This test is specifically targeting process metrics collected with PSS + SkipOn.Platform(SkipOn.PlatformValue.Linux); + SkipOn.Platform(SkipOn.PlatformValue.MacOs); + var statsd = new Mock(); var listener = new Mock(); @@ -144,9 +149,9 @@ public async Task ShouldCaptureProcessMetrics() actualNumberOfThreads.Should().NotBeNull(); - // To future generations: if 10 is not enough, feel free to bump it up. We're really just checking that the value is "realistic". + // To future generations: if 50 is not enough, feel free to bump it up. We're really just checking that the value is "realistic". actualNumberOfThreads.Should() - .NotBeNull().And.BeGreaterThan(0).And.BeInRange(expectedNumberOfThreads - 10, expectedNumberOfThreads + 10); + .NotBeNull().And.BeGreaterThan(0).And.BeInRange(expectedNumberOfThreads - 50, expectedNumberOfThreads + 50); // CPU time and memory usage can vary wildly, so don't try too hard to validate userCpuTime.Should().NotBeNull().And.BeGreaterThan(0); From 3a3095386c91be3a8dd4fa72d53b0a9a1c17abcf Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Wed, 13 Mar 2024 13:55:21 +0100 Subject: [PATCH 5/8] Update tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs Co-authored-by: Andrew Lock --- .../RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs index c63bae2b99ad..d8930dce491c 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/ProcessSnapshotRuntimeInformation.cs @@ -64,7 +64,7 @@ private enum PSS_CAPTURE_FLAGS : uint PSS_CREATE_RELEASE_SECTION = 0x80000000 } - // The value of the current process handle on Windows is hardcoded to 1 + // The value of the current process handle on Windows is hardcoded to -1 // https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getcurrentprocess#remarks private static IntPtr CurrentProcessHandle => new(-1); From edcff7109927a27f0217149ffe9f4dac4d601b84 Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Wed, 13 Mar 2024 15:49:56 +0100 Subject: [PATCH 6/8] Bump the number of threads, make test skippable --- .../RuntimeMetrics/RuntimeMetricsWriterTests.cs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs index 53f9dec33146..2d1b7e740ec1 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs @@ -104,7 +104,7 @@ public void ShouldCaptureFirstChanceExceptions() } } - [Fact] + [SkippableFact] public async Task ShouldCaptureProcessMetrics() { // This test is specifically targeting process metrics collected with PSS @@ -149,9 +149,8 @@ public async Task ShouldCaptureProcessMetrics() actualNumberOfThreads.Should().NotBeNull(); - // To future generations: if 50 is not enough, feel free to bump it up. We're really just checking that the value is "realistic". - actualNumberOfThreads.Should() - .NotBeNull().And.BeGreaterThan(0).And.BeInRange(expectedNumberOfThreads - 50, expectedNumberOfThreads + 50); + // To future generations: if 100 is not enough, feel free to bump it up. We're really just checking that the value is "realistic". + actualNumberOfThreads.Should().NotBeNull().And.BeGreaterThan(0).And.BeInRange(expectedNumberOfThreads - 100, expectedNumberOfThreads + 100); // CPU time and memory usage can vary wildly, so don't try too hard to validate userCpuTime.Should().NotBeNull().And.BeGreaterThan(0); From 6d94c9b9ba33ca467c4e1c7b8f1ae13cc7e5e9f0 Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Wed, 13 Mar 2024 17:33:05 +0100 Subject: [PATCH 7/8] Force to use a bit of CPU --- .../RuntimeMetrics/RuntimeMetricsWriterTests.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs index 2d1b7e740ec1..5a4e05cc8faa 100644 --- a/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs +++ b/tracer/test/Datadog.Trace.Tests/RuntimeMetrics/RuntimeMetricsWriterTests.cs @@ -141,6 +141,14 @@ public async Task ShouldCaptureProcessMetrics() statsd.Setup(s => s.Gauge(MetricsNames.CpuPercentage, It.IsAny(), It.IsAny(), It.IsAny())) .Callback((_, _, _, _) => tcs.TrySetResult(true)); + // Spin a bit to eat CPU + var sw = System.Diagnostics.Stopwatch.StartNew(); + + while (sw.Elapsed < TimeSpan.FromMilliseconds(50)) + { + Thread.SpinWait(10); + } + var timeout = Task.Delay(TimeSpan.FromSeconds(30)); await Task.WhenAny(tcs.Task, timeout); From bcc26b0e8321e2f90ec94ecca7ec12531886eeaf Mon Sep 17 00:00:00 2001 From: Kevin Gosse Date: Fri, 15 Mar 2024 10:00:02 +0100 Subject: [PATCH 8/8] Use Interlocked.Increment --- .../Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs index 3b48cb19ff3a..ce743275f03b 100644 --- a/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs +++ b/tracer/src/Datadog.Trace/RuntimeMetrics/RuntimeMetricsWriter.cs @@ -10,8 +10,6 @@ using System.Runtime.ExceptionServices; using System.Threading; using Datadog.Trace.Logging; -using Datadog.Trace.PlatformHelpers; -using Datadog.Trace.Util; using Datadog.Trace.Vendors.StatsdClient; namespace Datadog.Trace.RuntimeMetrics @@ -246,9 +244,9 @@ private void GetCurrentProcessMetrics(out TimeSpan userProcessorTime, out TimeSp } catch { - _pssConsecutiveFailures += 1; + var consecutiveFailures = Interlocked.Increment(ref _pssConsecutiveFailures); - if (_pssConsecutiveFailures >= 3) + if (consecutiveFailures >= 3) { Log.Error("Pss failed 3 times in a row, falling back to the Process API"); }