Skip to content

Commit 47ee000

Browse files
evgenyfedorov2evgenyfedorov2
authored andcommitted
Update
1 parent ecc6d5d commit 47ee000

File tree

4 files changed

+48
-48
lines changed

4 files changed

+48
-48
lines changed

src/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring/Linux/LinuxUtilizationProvider.cs

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
using System;
55
using System.Collections.Generic;
66
using System.Diagnostics.Metrics;
7-
using System.Linq;
87
using System.Threading;
98
using Microsoft.Extensions.Logging;
109
using Microsoft.Extensions.Logging.Abstractions;
@@ -17,6 +16,7 @@ internal sealed class LinuxUtilizationProvider : ISnapshotProvider
1716
{
1817
private const double One = 1.0;
1918
private const long Hundred = 100L;
19+
private const double NanosecondsInSecond = 1_000_000_000;
2020

2121
private readonly object _cpuLocker = new();
2222
private readonly object _memoryLocker = new();
@@ -82,14 +82,19 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
8282
(_previousCgroupCpuTime, _previousCgroupCpuPeriodCounter) = _parser.GetCgroupCpuUsageInNanosecondsAndCpuPeriodsV2();
8383

8484
_ = meter.CreateObservableGauge(
85-
ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
86-
() => GetMeasurementWithRetry(() => CpuUtilizationLimit(cpuLimit)),
87-
"1");
85+
name: ResourceUtilizationInstruments.ContainerCpuLimitUtilization,
86+
observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationLimit(cpuLimit)),
87+
unit: "1");
8888

8989
_ = meter.CreateObservableGauge(
9090
name: ResourceUtilizationInstruments.ContainerCpuRequestUtilization,
9191
observeValues: () => GetMeasurementWithRetry(() => CpuUtilizationRequest(cpuRequest)),
9292
unit: "1");
93+
94+
_ = meter.CreateObservableGauge(
95+
name: ResourceUtilizationInstruments.ContainerCpuTime,
96+
observeValues: GetCpuTime,
97+
unit: "1");
9398
}
9499
else
95100
{
@@ -111,12 +116,12 @@ public LinuxUtilizationProvider(IOptions<ResourceMonitoringOptions> options, ILi
111116

112117
_ = meter.CreateObservableGauge(
113118
name: ResourceUtilizationInstruments.ContainerMemoryLimitUtilization,
114-
observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()),
119+
observeValues: () => GetMeasurementWithRetry(MemoryUtilization),
115120
unit: "1");
116121

117122
_ = meter.CreateObservableGauge(
118123
name: ResourceUtilizationInstruments.ProcessMemoryUtilization,
119-
observeValues: () => GetMeasurementWithRetry(() => MemoryUtilization()),
124+
observeValues: () => GetMeasurementWithRetry(MemoryUtilization),
120125
unit: "1");
121126

122127
// cpuRequest is a CPU request (aka guaranteed number of CPU units) for pod, for host its 1 core
@@ -259,23 +264,32 @@ public Snapshot GetSnapshot()
259264
memoryUsageInBytes: memoryUsed);
260265
}
261266

262-
private IEnumerable<Measurement<double>> GetMeasurementWithRetry(Func<double> func)
267+
private Measurement<double>[] GetMeasurementWithRetry(Func<double> func)
268+
{
269+
if (!TryGetValueWithRetry(func, out double value))
270+
{
271+
return Array.Empty<Measurement<double>>();
272+
}
273+
274+
return new[] { new Measurement<double>(value) };
275+
}
276+
277+
private bool TryGetValueWithRetry<T>(Func<T> func, out T value)
278+
where T : struct
263279
{
280+
value = default;
264281
if (Volatile.Read(ref _measurementsUnavailable) == 1 &&
265282
_timeProvider.GetUtcNow() - _lastFailure < _retryInterval)
266283
{
267-
return Enumerable.Empty<Measurement<double>>();
284+
return false;
268285
}
269286

270287
try
271288
{
272-
double result = func();
273-
if (Volatile.Read(ref _measurementsUnavailable) == 1)
274-
{
275-
_ = Interlocked.Exchange(ref _measurementsUnavailable, 0);
276-
}
289+
value = func();
290+
_ = Interlocked.CompareExchange(ref _measurementsUnavailable, 0, 1);
277291

278-
return new[] { new Measurement<double>(result) };
292+
return true;
279293
}
280294
catch (Exception ex) when (
281295
ex is System.IO.FileNotFoundException ||
@@ -285,7 +299,7 @@ ex is System.IO.DirectoryNotFoundException ||
285299
_lastFailure = _timeProvider.GetUtcNow();
286300
_ = Interlocked.Exchange(ref _measurementsUnavailable, 1);
287301

288-
return Enumerable.Empty<Measurement<double>>();
302+
return false;
289303
}
290304
}
291305

@@ -296,10 +310,14 @@ ex is System.IO.DirectoryNotFoundException ||
296310

297311
private IEnumerable<Measurement<double>> GetCpuTime()
298312
{
299-
long hostCpuTime = _parser.GetHostCpuUsageInNanoseconds();
300-
double cgroupCpuTime = CpuUtilizationWithoutHostDelta();
313+
if (TryGetValueWithRetry(_parser.GetHostCpuUsageInNanoseconds, out long systemCpuTime))
314+
{
315+
yield return new Measurement<double>(systemCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
316+
}
301317

302-
yield return new Measurement<double>(cgroupCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
303-
yield return new Measurement<double>(hostCpuTime / NanosecondsInSecond, [new KeyValuePair<string, object?>("cpu.mode", "system")]);
318+
if (TryGetValueWithRetry(CpuUtilizationV2, out double userCpuTime))
319+
{
320+
yield return new Measurement<double>(userCpuTime, [new KeyValuePair<string, object?>("cpu.mode", "user")]);
321+
}
304322
}
305323
}

test/Libraries/Microsoft.Extensions.Diagnostics.HealthChecks.ResourceUtilization.Tests/ResourceHealthCheckExtensionsTests.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,7 @@ public async Task TestCpuAndMemoryChecks_WithMetrics(
500500
accountingInfoAfter1Ms.TotalUserTime = (long)(utilization * 100);
501501
jobHandleMock.SetupSequence(j => j.GetBasicAccountingInfo())
502502
.Returns(() => initialAccountingInfo) // this is called from the WindowsContainerSnapshotProvider's constructor
503+
.Returns(() => initialAccountingInfo) // this is called from the WindowsContainerSnapshotProvider's GetCpuTime method
503504
.Returns(() => accountingInfoAfter1Ms); // this is called from the WindowsContainerSnapshotProvider's CpuPercentage method
504505

505506
using var meter = new Meter("Microsoft.Extensions.Diagnostics.ResourceMonitoring");

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/AcceptanceTest.cs

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -249,8 +249,6 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
249249
Assert.Equal(0, utilization.CpuUsedPercentage);
250250
Assert.Equal(100, utilization.MemoryUsedPercentage);
251251
Assert.True(double.IsNaN(cpuFromGauge));
252-
Assert.Equal(0.000102312, cpuUserTime);
253-
Assert.Equal(0.8, cpuKernelTime);
254252

255253
// gauge multiplied by 100 because gauges are in range [0, 1], and utilization is in range [0, 100]
256254
Assert.Equal(utilization.MemoryUsedPercentage, memoryFromGauge * 100);
@@ -269,8 +267,6 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
269267
Assert.Equal(1, utilization.CpuUsedPercentage);
270268
Assert.Equal(50, utilization.MemoryUsedPercentage);
271269
Assert.Equal(0.5, cpuLimitFromGauge * 100);
272-
Assert.Equal(0.000112312, cpuUserTime);
273-
Assert.Equal(0.81, cpuKernelTime);
274270
Assert.Equal(utilization.CpuUsedPercentage, cpuRequestFromGauge * 100);
275271
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
276272
Assert.Equal(utilization.CpuUsedPercentage, cpuFromGauge * 100);
@@ -360,8 +356,6 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
360356
Assert.Equal(1, roundedCpuUsedPercentage);
361357
Assert.Equal(50, utilization.MemoryUsedPercentage);
362358
Assert.Equal(0.5, cpuLimitFromGauge * 100);
363-
Assert.Equal(0.000112, cpuUserTime);
364-
Assert.Equal(0.81, cpuKernelTime);
365359
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuRequestFromGauge * 100));
366360
Assert.Equal(utilization.MemoryUsedPercentage, memoryLimitFromGauge * 100);
367361
Assert.Equal(roundedCpuUsedPercentage, Math.Round(cpuFromGauge * 100));
@@ -373,7 +367,7 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
373367
[ConditionalFact]
374368
[CombinatorialData]
375369
[OSSkipCondition(OperatingSystems.Windows | OperatingSystems.MacOSX, SkipReason = "Linux specific tests")]
376-
public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgroupsv2_v2_Using_NrPeriods()
370+
public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgroupsv2_Using_LinuxCalculationV2()
377371
{
378372
var fileSystem = new HardcodedValueFileSystem(new Dictionary<FileInfo, string>
379373
{
@@ -396,7 +390,6 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
396390
var cpuRequestFromGauge = 0.0d;
397391
var memoryFromGauge = 0.0d;
398392
var memoryLimitFromGauge = 0.0d;
399-
using var e = new ManualResetEventSlim();
400393

401394
object? meterScope = null;
402395
listener.InstrumentPublished = (Instrument instrument, MeterListener meterListener)
@@ -405,13 +398,12 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
405398
=> OnMeasurementReceived(m, f, tags, ref cpuUserTime, ref cpuKernelTime, ref cpuFromGauge, ref cpuLimitFromGauge, ref cpuRequestFromGauge, ref memoryFromGauge, ref memoryLimitFromGauge));
406399
listener.Start();
407400

408-
using var host = FakeHost.CreateBuilder()
401+
using IHost host = FakeHost.CreateBuilder()
409402
.ConfigureServices(x =>
410403
x.AddLogging()
411404
.AddSingleton<TimeProvider>(clock)
412405
.AddSingleton<IUserHz>(new FakeUserHz(100))
413406
.AddSingleton<IFileSystem>(fileSystem)
414-
.AddSingleton<IResourceUtilizationPublisher>(new GenericPublisher(_ => e.Set()))
415407
.AddResourceMonitoring(x => x.ConfigureMonitor(options =>
416408
{
417409
options.UseLinuxCalculationV2 = true;
@@ -420,15 +412,11 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
420412
.Build();
421413

422414
meterScope = host.Services.GetRequiredService<IMeterFactory>();
423-
var tracker = host.Services.GetService<IResourceMonitor>();
424-
Assert.NotNull(tracker);
425415

426416
_ = host.RunAsync();
427417

428418
listener.RecordObservableInstruments();
429419

430-
var utilization = tracker.GetUtilization(TimeSpan.FromSeconds(5));
431-
432420
fileSystem.ReplaceFileContent(new FileInfo("/proc/stat"), "cpu 11 10 10 10 10 10 10 10 10 10");
433421
fileSystem.ReplaceFileContent(new FileInfo("/sys/fs/cgroup/fakeslice/cpu.stat"), "usage_usec 1120000\nnr_periods 56");
434422
fileSystem.ReplaceFileContent(new FileInfo("/sys/fs/cgroup/memory.current"), "524298");
@@ -437,14 +425,10 @@ public Task ResourceUtilizationTracker_And_Metrics_Report_Same_Values_With_Cgrou
437425
clock.Advance(TimeSpan.FromSeconds(6));
438426
listener.RecordObservableInstruments();
439427

440-
e.Wait();
441-
442-
utilization = tracker.GetUtilization(TimeSpan.FromSeconds(5));
443-
444-
var roundedCpuUsedPercentage = Math.Round(utilization.CpuUsedPercentage, 1);
445-
446428
Assert.Equal(42, Math.Round(cpuLimitFromGauge * 100));
447429
Assert.Equal(83, Math.Round(cpuRequestFromGauge * 100));
430+
Assert.Equal(167, Math.Round(cpuUserTime * 100));
431+
Assert.Equal(81, Math.Round(cpuKernelTime * 100));
448432

449433
return Task.CompletedTask;
450434
}

test/Libraries/Microsoft.Extensions.Diagnostics.ResourceMonitoring.Tests/Linux/LinuxUtilizationProviderTests.cs

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ public void Provider_Registers_Instruments()
7575
listener.Start();
7676
listener.RecordObservableInstruments();
7777

78-
Assert.Equal(7, samples.Count);
78+
Assert.Equal(5, samples.Count);
7979

8080
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
8181
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -91,9 +91,6 @@ public void Provider_Registers_Instruments()
9191

9292
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
9393
Assert.Equal(0.5, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
94-
95-
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (50.0 / 1_000_000_000)) < 0.00001);
96-
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
9794
}
9895

9996
[ConditionalFact]
@@ -147,7 +144,7 @@ public void Provider_Registers_Instruments_CgroupV2()
147144
listener.Start();
148145
listener.RecordObservableInstruments();
149146

150-
Assert.Equal(7, samples.Count);
147+
Assert.Equal(5, samples.Count);
151148

152149
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
153150
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
@@ -163,9 +160,6 @@ public void Provider_Registers_Instruments_CgroupV2()
163160

164161
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization);
165162
Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ProcessMemoryUtilization).value);
166-
167-
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - (102312.0 / 1_000_000)) < 0.00001);
168-
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime && Math.Abs(x.value - 0.8) < 0.00001);
169163
}
170164

171165
[Fact]
@@ -265,14 +259,17 @@ public void Provider_Registers_Instruments_CgroupV2_WithoutHostCpu()
265259
listener.Start();
266260
listener.RecordObservableInstruments();
267261

268-
Assert.Equal(4, samples.Count);
262+
Assert.Equal(6, samples.Count);
269263

270264
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization);
271265
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuLimitUtilization).value));
272266

273267
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuRequestUtilization);
274268
Assert.True(double.IsNaN(samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuRequestUtilization).value));
275269

270+
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime);
271+
Assert.All(samples.Where(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerCpuTime), item => double.IsNaN(item.value));
272+
276273
Assert.Contains(samples, x => x.instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization);
277274
Assert.Equal(1, samples.Single(i => i.instrument.Name == ResourceUtilizationInstruments.ContainerMemoryLimitUtilization).value);
278275

0 commit comments

Comments
 (0)