@@ -20,6 +20,7 @@ public class Engine : IEngine
2020
2121 [ PublicAPI ] public IHost Host { get ; }
2222 [ PublicAPI ] public Action < long > WorkloadAction { get ; }
23+ [ PublicAPI ] public Action < long > WorkloadActionNoUnroll { get ; }
2324 [ PublicAPI ] public Action Dummy1Action { get ; }
2425 [ PublicAPI ] public Action Dummy2Action { get ; }
2526 [ PublicAPI ] public Action Dummy3Action { get ; }
@@ -45,19 +46,23 @@ public class Engine : IEngine
4546 private readonly EnginePilotStage pilotStage ;
4647 private readonly EngineWarmupStage warmupStage ;
4748 private readonly EngineActualStage actualStage ;
48- private readonly bool includeExtraStats ;
4949 private readonly Random random ;
50+ private readonly bool includeExtraStats , includeSurvivedMemory ;
51+
52+ private long survivedBytes ;
53+ private bool survivedBytesMeasured ;
54+ private static Func < long > GetTotalBytes { get ; set ; }
5055
5156 internal Engine (
5257 IHost host ,
5358 IResolver resolver ,
54- Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Job targetJob ,
59+ Action dummy1Action , Action dummy2Action , Action dummy3Action , Action < long > overheadAction , Action < long > workloadAction , Action < long > workloadActionNoUnroll , Job targetJob ,
5560 Action globalSetupAction , Action globalCleanupAction , Action iterationSetupAction , Action iterationCleanupAction , long operationsPerInvoke ,
56- bool includeExtraStats , string benchmarkName )
61+ bool includeExtraStats , bool includeSurvivedMemory , string benchmarkName )
5762 {
58-
5963 Host = host ;
6064 OverheadAction = overheadAction ;
65+ WorkloadActionNoUnroll = workloadActionNoUnroll ;
6166 Dummy1Action = dummy1Action ;
6267 Dummy2Action = dummy2Action ;
6368 Dummy3Action = dummy3Action ;
@@ -70,6 +75,7 @@ internal Engine(
7075 OperationsPerInvoke = operationsPerInvoke ;
7176 this . includeExtraStats = includeExtraStats ;
7277 BenchmarkName = benchmarkName ;
78+ this . includeSurvivedMemory = includeSurvivedMemory ;
7379
7480 Resolver = resolver ;
7581
@@ -85,6 +91,55 @@ internal Engine(
8591 actualStage = new EngineActualStage ( this ) ;
8692
8793 random = new Random ( 12345 ) ; // we are using constant seed to try to get repeatable results
94+
95+ if ( includeSurvivedMemory && GetTotalBytes is null )
96+ {
97+ // CreateGetTotalBytesFunc enables monitoring, so we only call it if we need to measure survived memory.
98+ GetTotalBytes = CreateGetTotalBytesFunc ( ) ;
99+
100+ // Necessary for CORE runtimes.
101+ // Measure bytes to allow GC monitor to make its allocations.
102+ GetTotalBytes ( ) ;
103+ // Run the clock once to allow it to make its allocations.
104+ MeasureAction ( _ => { } , 0 ) ;
105+ GetTotalBytes ( ) ;
106+ }
107+ }
108+
109+ private static Func < long > CreateGetTotalBytesFunc ( )
110+ {
111+ // Don't try to measure in Mono, Monitoring is not available, and GC.GetTotalMemory is very inaccurate.
112+ if ( RuntimeInformation . IsMono )
113+ return ( ) => 0 ;
114+ try
115+ {
116+ // Docs say this should be available in .NET Core 2.1, but it throws an exception.
117+ // Just try this on all non-Mono runtimes, fallback to GC.GetTotalMemory.
118+ AppDomain . MonitoringIsEnabled = true ;
119+ return ( ) =>
120+ {
121+ // Enforce GC.Collect here to make sure we get accurate results.
122+ ForceGcCollect ( ) ;
123+ return AppDomain . CurrentDomain . MonitoringSurvivedMemorySize ;
124+ } ;
125+ }
126+ catch
127+ {
128+ return ( ) =>
129+ {
130+ // Enforce GC.Collect here to make sure we get accurate results.
131+ ForceGcCollect ( ) ;
132+ return GC . GetTotalMemory ( true ) ;
133+ } ;
134+ }
135+ }
136+
137+ internal Engine WithInitialData ( Engine other )
138+ {
139+ // Copy the survived bytes from the other engine so we only measure it once.
140+ survivedBytes = other . survivedBytes ;
141+ survivedBytesMeasured = other . survivedBytesMeasured ;
142+ return this ;
88143 }
89144
90145 public void Dispose ( )
@@ -160,7 +215,9 @@ public Measurement RunIteration(IterationData data)
160215 var action = isOverhead ? OverheadAction : WorkloadAction ;
161216
162217 if ( ! isOverhead )
218+ {
163219 IterationSetupAction ( ) ;
220+ }
164221
165222 GcCollect ( ) ;
166223
@@ -169,10 +226,36 @@ public Measurement RunIteration(IterationData data)
169226
170227 Span < byte > stackMemory = randomizeMemory ? stackalloc byte [ random . Next ( 32 ) ] : Span < byte > . Empty ;
171228
172- // Measure
173- var clock = Clock . Start ( ) ;
174- action ( invokeCount / unrollFactor ) ;
175- var clockSpan = clock . GetElapsed ( ) ;
229+ bool needsSurvivedMeasurement = includeSurvivedMemory && ! isOverhead && ! survivedBytesMeasured ;
230+ double nanoseconds ;
231+ if ( needsSurvivedMeasurement )
232+ {
233+ // Measure survived bytes for only the first invocation.
234+ survivedBytesMeasured = true ;
235+ if ( totalOperations == 1 )
236+ {
237+ // Measure normal invocation for both survived memory and time.
238+ long beforeBytes = GetTotalBytes ( ) ;
239+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
240+ long afterBytes = GetTotalBytes ( ) ;
241+ survivedBytes = afterBytes - beforeBytes ;
242+ }
243+ else
244+ {
245+ // Measure a single invocation for survived memory, plus normal invocations for time.
246+ ++ totalOperations ;
247+ long beforeBytes = GetTotalBytes ( ) ;
248+ nanoseconds = MeasureAction ( WorkloadActionNoUnroll , 1 ) ;
249+ long afterBytes = GetTotalBytes ( ) ;
250+ survivedBytes = afterBytes - beforeBytes ;
251+ nanoseconds += MeasureAction ( action , invokeCount / unrollFactor ) ;
252+ }
253+ }
254+ else
255+ {
256+ // Measure time normally.
257+ nanoseconds = MeasureAction ( action , invokeCount / unrollFactor ) ;
258+ }
176259
177260 if ( EngineEventSource . Log . IsEnabled ( ) )
178261 EngineEventSource . Log . IterationStop ( data . IterationMode , data . IterationStage , totalOperations ) ;
@@ -186,7 +269,7 @@ public Measurement RunIteration(IterationData data)
186269 GcCollect ( ) ;
187270
188271 // Results
189- var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , clockSpan . GetNanoseconds ( ) ) ;
272+ var measurement = new Measurement ( 0 , data . IterationMode , data . IterationStage , data . Index , totalOperations , nanoseconds ) ;
190273 WriteLine ( measurement . ToString ( ) ) ;
191274 if ( measurement . IterationStage == IterationStage . Jitting )
192275 jittingMeasurements . Add ( measurement ) ;
@@ -196,6 +279,15 @@ public Measurement RunIteration(IterationData data)
196279 return measurement ;
197280 }
198281
282+ // This is necessary for the CORE runtime to clean up the memory from the clock.
283+ [ MethodImpl ( MethodImplOptions . NoInlining ) ]
284+ private double MeasureAction ( Action < long > action , long arg )
285+ {
286+ var clock = Clock . Start ( ) ;
287+ action ( arg ) ;
288+ return clock . GetElapsed ( ) . GetNanoseconds ( ) ;
289+ }
290+
199291 private ( GcStats , ThreadingStats , double ) GetExtraStats ( IterationData data )
200292 {
201293 // we enable monitoring after main target run, for this single iteration which is executed at the end
@@ -219,7 +311,7 @@ public Measurement RunIteration(IterationData data)
219311 IterationCleanupAction ( ) ; // we run iteration cleanup after collecting GC stats
220312
221313 var totalOperationsCount = data . InvokeCount * OperationsPerInvoke ;
222- GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperations ( totalOperationsCount ) ;
314+ GcStats gcStats = ( finalGcStats - initialGcStats ) . WithTotalOperationsAndSurvivedBytes ( data . InvokeCount * OperationsPerInvoke , survivedBytes ) ;
223315 ThreadingStats threadingStats = ( finalThreadingStats - initialThreadingStats ) . WithTotalOperations ( data . InvokeCount * OperationsPerInvoke ) ;
224316
225317 return ( gcStats , threadingStats , exceptionsStats . ExceptionsCount / ( double ) totalOperationsCount ) ;
0 commit comments