Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Perf] Regressions in System.IO.Tests.Perf_FileStream #51256

Closed
DrewScoggins opened this issue Apr 14, 2021 · 2 comments
Closed

[Perf] Regressions in System.IO.Tests.Perf_FileStream #51256

DrewScoggins opened this issue Apr 14, 2021 · 2 comments
Assignees
Labels
area-System.IO tenet-performance Performance related issue
Milestone

Comments

@DrewScoggins
Copy link
Member

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline a06eccb3600b702d911bcdf8fb0caa50970858d9
Compare 98038888bfb4215a44002b2046eae9ff166087f5
Diff Diff

Regressions in System.IO.Tests.Perf_FileStream

Benchmark Baseline Test Test/Base Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
FlushAsync 2.62 ms 3.67 ms 1.40 Trace Trace
ReadAsync 43.11 μs 55.59 μs 1.29 Trace Trace

graph
graph
Historical Data in Reporting System

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f netcoreapp5.0 --filter 'System.IO.Tests.Perf_FileStream*'

Payloads

Baseline
Compare

Histogram

System.IO.Tests.Perf_FileStream.FlushAsync(fileSize: 1024, options: None)


Baseline Jit Disasm

; System.IO.Tests.Perf_FileStream.FlushAsync(Int64, System.IO.FileOptions)
       sub       rsp,58
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+20],xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       xor       eax,eax
       mov       [rsp+50],rax
       xor       eax,eax
       mov       [rsp+48],rax
       mov       [rsp+20],rcx
       mov       [rsp+30],rdx
       mov       [rsp+3C],r8d
       mov       dword ptr [rsp+38],0FFFFFFFF
       lea       rcx,[rsp+20]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       mov       rax,[rsp+48]
       test      rax,rax
       je        short M00_L01
M00_L00:
       add       rsp,58
       ret
M00_L01:
       lea       rcx,[rsp+48]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder.InitializeTaskAsPromise()
       jmp       short M00_L00
; Total bytes of code 100
; System.Threading.Tasks.Task.GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter.GetResult()
       mov       rcx,[rcx]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       jmp       near ptr System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       ret
; Total bytes of code 24
; System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       push      rbp
       push      rsi
       sub       rsp,48
       lea       rbp,[rsp+50]
       mov       [rbp+0FFD0],rsp
       mov       rsi,rcx
       cmp       [rsi],esi
       mov       rcx,7FF999D34540
       mov       edx,281
       call      CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE
       mov       rax,[rax+20]
       test      rax,rax
       jne       short M03_L00
       call      System.Threading.Thread.InitializeCurrentThread()
M03_L00:
       mov       [rbp+0FFF0],rax
       mov       rdx,[rax+8]
       mov       [rbp+0FFE8],rdx
       mov       rax,[rax+10]
       mov       [rbp+0FFE0],rax
       mov       rcx,rsi
       call      System.IO.Tests.Perf_FileStream+<FlushAsync>d__36.MoveNext()
       nop
       mov       rax,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rax,[rcx+10]
       je        short M03_L01
       lea       rcx,[rcx+10]
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L01:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L02
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L02:
       nop
       lea       rsp,[rbp+0FFF8]
       pop       rsi
       pop       rbp
       ret
       push      rbp
       push      rsi
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rdx,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rdx,[rcx+10]
       je        short M03_L03
       lea       rcx,[rcx+10]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L03:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L04
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L04:
       nop
       add       rsp,28
       pop       rsi
       pop       rbp
       ret
; Total bytes of code 211

Compare Jit Disasm

; System.IO.Tests.Perf_FileStream.FlushAsync(Int64, System.IO.FileOptions)
       sub       rsp,58
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+20],xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       xor       eax,eax
       mov       [rsp+50],rax
       xor       eax,eax
       mov       [rsp+48],rax
       mov       [rsp+20],rcx
       mov       [rsp+30],rdx
       mov       [rsp+3C],r8d
       mov       dword ptr [rsp+38],0FFFFFFFF
       lea       rcx,[rsp+20]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       mov       rax,[rsp+48]
       test      rax,rax
       je        short M00_L01
M00_L00:
       add       rsp,58
       ret
M00_L01:
       lea       rcx,[rsp+48]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder.InitializeTaskAsPromise()
       jmp       short M00_L00
; Total bytes of code 100
; System.Threading.Tasks.Task.GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter.GetResult()
       mov       rcx,[rcx]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       jmp       near ptr System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       ret
; Total bytes of code 24
; System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       push      rbp
       push      rsi
       sub       rsp,48
       lea       rbp,[rsp+50]
       mov       [rbp+0FFD0],rsp
       mov       rsi,rcx
       cmp       [rsi],esi
       mov       rcx,7FFF11044550
       mov       edx,281
       call      CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE
       mov       rax,[rax+20]
       test      rax,rax
       jne       short M03_L00
       call      System.Threading.Thread.InitializeCurrentThread()
M03_L00:
       mov       [rbp+0FFF0],rax
       mov       rdx,[rax+8]
       mov       [rbp+0FFE8],rdx
       mov       rax,[rax+10]
       mov       [rbp+0FFE0],rax
       mov       rcx,rsi
       call      System.IO.Tests.Perf_FileStream+<FlushAsync>d__36.MoveNext()
       nop
       mov       rax,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rax,[rcx+10]
       je        short M03_L01
       lea       rcx,[rcx+10]
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L01:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L02
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L02:
       nop
       lea       rsp,[rbp+0FFF8]
       pop       rsi
       pop       rbp
       ret
       push      rbp
       push      rsi
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rdx,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rdx,[rcx+10]
       je        short M03_L03
       lea       rcx,[rcx+10]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L03:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L04
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L04:
       nop
       add       rsp,28
       pop       rsi
       pop       rbp
       ret
; Total bytes of code 211
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

System.IO.Tests.Perf_FileStream.ReadAsync(fileSize: 1024, userBufferSize: 1024, options: Asynchronous)


Baseline Jit Disasm

; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions)
       sub       rsp,28
       mov       dword ptr [rsp+20],1000
       call      System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 23
; System.Threading.Tasks.Task`1[[System.Int64, System.Private.CoreLib]].GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter`1[[System.Int64, System.Private.CoreLib]].GetResult()
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       rcx,[rsi]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       call      System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       mov       rax,[rsi]
       mov       rax,[rax+38]
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 44
; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       sub       rsp,88
       xor       eax,eax
       mov       [rsp+28],rax
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       vmovdqa   xmmword ptr [rsp+50],xmm4
       vmovdqa   xmmword ptr [rsp+60],xmm4
       vmovdqa   xmmword ptr [rsp+70],xmm4
       mov       [rsp+80],rax
       xor       eax,eax
       mov       [rsp+58],rax
       mov       [rsp+28],rcx
       mov       [rsp+38],rdx
       mov       [rsp+4C],r8d
       mov       [rsp+54],r9d
       mov       ecx,[rsp+0B0]
       mov       [rsp+50],ecx
       mov       dword ptr [rsp+48],0FFFFFFFF
       lea       rcx,[rsp+28]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<ReadAsync>d__31, MicroBenchmarks]](<ReadAsync>d__31 ByRef)
       mov       rax,[rsp+58]
       test      rax,rax
       jne       short M03_L00
       lea       rcx,[rsp+58]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[[System.Int64, System.Private.CoreLib]].InitializeTaskAsPromise()
M03_L00:
       nop
       add       rsp,88
       ret
; Total bytes of code 141
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

Compare Jit Disasm

; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions)
       sub       rsp,28
       mov       dword ptr [rsp+20],1000
       call      System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 23
; System.Threading.Tasks.Task`1[[System.Int64, System.Private.CoreLib]].GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter`1[[System.Int64, System.Private.CoreLib]].GetResult()
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       rcx,[rsi]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       call      System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       mov       rax,[rsi]
       mov       rax,[rax+38]
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 44
; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       sub       rsp,88
       xor       eax,eax
       mov       [rsp+28],rax
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       vmovdqa   xmmword ptr [rsp+50],xmm4
       vmovdqa   xmmword ptr [rsp+60],xmm4
       vmovdqa   xmmword ptr [rsp+70],xmm4
       mov       [rsp+80],rax
       xor       eax,eax
       mov       [rsp+58],rax
       mov       [rsp+28],rcx
       mov       [rsp+38],rdx
       mov       [rsp+4C],r8d
       mov       [rsp+54],r9d
       mov       ecx,[rsp+0B0]
       mov       [rsp+50],ecx
       mov       dword ptr [rsp+48],0FFFFFFFF
       lea       rcx,[rsp+28]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<ReadAsync>d__31, MicroBenchmarks]](<ReadAsync>d__31 ByRef)
       mov       rax,[rsp+58]
       test      rax,rax
       jne       short M03_L00
       lea       rcx,[rsp+58]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[[System.Int64, System.Private.CoreLib]].InitializeTaskAsPromise()
M03_L00:
       nop
       add       rsp,88
       ret
; Total bytes of code 141
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

@DrewScoggins DrewScoggins added tenet-performance Performance related issue tenet-performance-benchmarks Issue from performance benchmark labels Apr 14, 2021
@dotnet-issue-labeler dotnet-issue-labeler bot added area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI untriaged New issue has not been triaged by the area owner labels Apr 14, 2021
@JulieLeeMSFT JulieLeeMSFT added needs-further-triage Issue has been initially triaged, but needs deeper consideration or reconsideration and removed untriaged New issue has not been triaged by the area owner labels Apr 15, 2021
@adamsitnik adamsitnik added area-System.IO and removed area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI labels Apr 15, 2021
@ghost
Copy link

ghost commented Apr 15, 2021

Tagging subscribers to this area: @carlossanlop
See info in area-owners.md if you want to be subscribed.

Issue Details

Run Information

Architecture x64
OS Windows 10.0.18362
Baseline a06eccb3600b702d911bcdf8fb0caa50970858d9
Compare 98038888bfb4215a44002b2046eae9ff166087f5
Diff Diff

Regressions in System.IO.Tests.Perf_FileStream

Benchmark Baseline Test Test/Base Baseline IR Compare IR IR Ratio Baseline ETL Compare ETL
FlushAsync 2.62 ms 3.67 ms 1.40 Trace Trace
ReadAsync 43.11 μs 55.59 μs 1.29 Trace Trace

graph
graph
Historical Data in Reporting System

Repro

git clone https://github.com/dotnet/performance.git
py .\performance\scripts\benchmarks_ci.py -f netcoreapp5.0 --filter 'System.IO.Tests.Perf_FileStream*'

Payloads

Baseline
Compare

Histogram

System.IO.Tests.Perf_FileStream.FlushAsync(fileSize: 1024, options: None)


Baseline Jit Disasm

; System.IO.Tests.Perf_FileStream.FlushAsync(Int64, System.IO.FileOptions)
       sub       rsp,58
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+20],xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       xor       eax,eax
       mov       [rsp+50],rax
       xor       eax,eax
       mov       [rsp+48],rax
       mov       [rsp+20],rcx
       mov       [rsp+30],rdx
       mov       [rsp+3C],r8d
       mov       dword ptr [rsp+38],0FFFFFFFF
       lea       rcx,[rsp+20]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       mov       rax,[rsp+48]
       test      rax,rax
       je        short M00_L01
M00_L00:
       add       rsp,58
       ret
M00_L01:
       lea       rcx,[rsp+48]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder.InitializeTaskAsPromise()
       jmp       short M00_L00
; Total bytes of code 100
; System.Threading.Tasks.Task.GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter.GetResult()
       mov       rcx,[rcx]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       jmp       near ptr System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       ret
; Total bytes of code 24
; System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       push      rbp
       push      rsi
       sub       rsp,48
       lea       rbp,[rsp+50]
       mov       [rbp+0FFD0],rsp
       mov       rsi,rcx
       cmp       [rsi],esi
       mov       rcx,7FF999D34540
       mov       edx,281
       call      CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE
       mov       rax,[rax+20]
       test      rax,rax
       jne       short M03_L00
       call      System.Threading.Thread.InitializeCurrentThread()
M03_L00:
       mov       [rbp+0FFF0],rax
       mov       rdx,[rax+8]
       mov       [rbp+0FFE8],rdx
       mov       rax,[rax+10]
       mov       [rbp+0FFE0],rax
       mov       rcx,rsi
       call      System.IO.Tests.Perf_FileStream+<FlushAsync>d__36.MoveNext()
       nop
       mov       rax,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rax,[rcx+10]
       je        short M03_L01
       lea       rcx,[rcx+10]
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L01:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L02
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L02:
       nop
       lea       rsp,[rbp+0FFF8]
       pop       rsi
       pop       rbp
       ret
       push      rbp
       push      rsi
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rdx,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rdx,[rcx+10]
       je        short M03_L03
       lea       rcx,[rcx+10]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L03:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L04
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L04:
       nop
       add       rsp,28
       pop       rsi
       pop       rbp
       ret
; Total bytes of code 211

Compare Jit Disasm

; System.IO.Tests.Perf_FileStream.FlushAsync(Int64, System.IO.FileOptions)
       sub       rsp,58
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+20],xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       xor       eax,eax
       mov       [rsp+50],rax
       xor       eax,eax
       mov       [rsp+48],rax
       mov       [rsp+20],rcx
       mov       [rsp+30],rdx
       mov       [rsp+3C],r8d
       mov       dword ptr [rsp+38],0FFFFFFFF
       lea       rcx,[rsp+20]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       mov       rax,[rsp+48]
       test      rax,rax
       je        short M00_L01
M00_L00:
       add       rsp,58
       ret
M00_L01:
       lea       rcx,[rsp+48]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder.InitializeTaskAsPromise()
       jmp       short M00_L00
; Total bytes of code 100
; System.Threading.Tasks.Task.GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter.GetResult()
       mov       rcx,[rcx]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       jmp       near ptr System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       ret
; Total bytes of code 24
; System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<FlushAsync>d__36, MicroBenchmarks]](<FlushAsync>d__36 ByRef)
       push      rbp
       push      rsi
       sub       rsp,48
       lea       rbp,[rsp+50]
       mov       [rbp+0FFD0],rsp
       mov       rsi,rcx
       cmp       [rsi],esi
       mov       rcx,7FFF11044550
       mov       edx,281
       call      CORINFO_HELP_GETSHARED_GCTHREADSTATIC_BASE
       mov       rax,[rax+20]
       test      rax,rax
       jne       short M03_L00
       call      System.Threading.Thread.InitializeCurrentThread()
M03_L00:
       mov       [rbp+0FFF0],rax
       mov       rdx,[rax+8]
       mov       [rbp+0FFE8],rdx
       mov       rax,[rax+10]
       mov       [rbp+0FFE0],rax
       mov       rcx,rsi
       call      System.IO.Tests.Perf_FileStream+<FlushAsync>d__36.MoveNext()
       nop
       mov       rax,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rax,[rcx+10]
       je        short M03_L01
       lea       rcx,[rcx+10]
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L01:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L02
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L02:
       nop
       lea       rsp,[rbp+0FFF8]
       pop       rsi
       pop       rbp
       ret
       push      rbp
       push      rsi
       sub       rsp,28
       mov       rbp,[rcx+20]
       mov       [rsp+20],rbp
       lea       rbp,[rbp+50]
       mov       rdx,[rbp+0FFE0]
       mov       rcx,[rbp+0FFF0]
       cmp       rdx,[rcx+10]
       je        short M03_L03
       lea       rcx,[rcx+10]
       call      CORINFO_HELP_ASSIGN_REF
       mov       rcx,[rbp+0FFF0]
M03_L03:
       mov       r8,[rcx+8]
       mov       rdx,[rbp+0FFE8]
       cmp       rdx,r8
       je        short M03_L04
       call      System.Threading.ExecutionContext.RestoreChangedContextToThread(System.Threading.Thread, System.Threading.ExecutionContext, System.Threading.ExecutionContext)
M03_L04:
       nop
       add       rsp,28
       pop       rsi
       pop       rbp
       ret
; Total bytes of code 211
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

System.IO.Tests.Perf_FileStream.ReadAsync(fileSize: 1024, userBufferSize: 1024, options: Asynchronous)


Baseline Jit Disasm

; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions)
       sub       rsp,28
       mov       dword ptr [rsp+20],1000
       call      System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 23
; System.Threading.Tasks.Task`1[[System.Int64, System.Private.CoreLib]].GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter`1[[System.Int64, System.Private.CoreLib]].GetResult()
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       rcx,[rsi]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       call      System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       mov       rax,[rsi]
       mov       rax,[rax+38]
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 44
; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       sub       rsp,88
       xor       eax,eax
       mov       [rsp+28],rax
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       vmovdqa   xmmword ptr [rsp+50],xmm4
       vmovdqa   xmmword ptr [rsp+60],xmm4
       vmovdqa   xmmword ptr [rsp+70],xmm4
       mov       [rsp+80],rax
       xor       eax,eax
       mov       [rsp+58],rax
       mov       [rsp+28],rcx
       mov       [rsp+38],rdx
       mov       [rsp+4C],r8d
       mov       [rsp+54],r9d
       mov       ecx,[rsp+0B0]
       mov       [rsp+50],ecx
       mov       dword ptr [rsp+48],0FFFFFFFF
       lea       rcx,[rsp+28]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<ReadAsync>d__31, MicroBenchmarks]](<ReadAsync>d__31 ByRef)
       mov       rax,[rsp+58]
       test      rax,rax
       jne       short M03_L00
       lea       rcx,[rsp+58]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[[System.Int64, System.Private.CoreLib]].InitializeTaskAsPromise()
M03_L00:
       nop
       add       rsp,88
       ret
; Total bytes of code 141
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

Compare Jit Disasm

; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions)
       sub       rsp,28
       mov       dword ptr [rsp+20],1000
       call      System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       nop
       add       rsp,28
       ret
; Total bytes of code 23
; System.Threading.Tasks.Task`1[[System.Int64, System.Private.CoreLib]].GetAwaiter()
       mov       rax,rcx
       ret
; Total bytes of code 4
; System.Runtime.CompilerServices.TaskAwaiter`1[[System.Int64, System.Private.CoreLib]].GetResult()
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       rcx,[rsi]
       mov       eax,[rcx+34]
       and       eax,11000000
       cmp       eax,1000000
       je        short M02_L00
       call      System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
M02_L00:
       mov       rax,[rsi]
       mov       rax,[rax+38]
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 44
; System.IO.Tests.Perf_FileStream.ReadAsync(Int64, Int32, System.IO.FileOptions, Int32)
       sub       rsp,88
       xor       eax,eax
       mov       [rsp+28],rax
       vxorps    xmm4,xmm4,xmm4
       vmovdqa   xmmword ptr [rsp+30],xmm4
       vmovdqa   xmmword ptr [rsp+40],xmm4
       vmovdqa   xmmword ptr [rsp+50],xmm4
       vmovdqa   xmmword ptr [rsp+60],xmm4
       vmovdqa   xmmword ptr [rsp+70],xmm4
       mov       [rsp+80],rax
       xor       eax,eax
       mov       [rsp+58],rax
       mov       [rsp+28],rcx
       mov       [rsp+38],rdx
       mov       [rsp+4C],r8d
       mov       [rsp+54],r9d
       mov       ecx,[rsp+0B0]
       mov       [rsp+50],ecx
       mov       dword ptr [rsp+48],0FFFFFFFF
       lea       rcx,[rsp+28]
       call      System.Runtime.CompilerServices.AsyncMethodBuilderCore.Start[[System.IO.Tests.Perf_FileStream+<ReadAsync>d__31, MicroBenchmarks]](<ReadAsync>d__31 ByRef)
       mov       rax,[rsp+58]
       test      rax,rax
       jne       short M03_L00
       lea       rcx,[rsp+58]
       call      System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1[[System.Int64, System.Private.CoreLib]].InitializeTaskAsPromise()
M03_L00:
       nop
       add       rsp,88
       ret
; Total bytes of code 141
; System.Runtime.CompilerServices.TaskAwaiter.HandleNonSuccessAndDebuggerNotification(System.Threading.Tasks.Task)
       push      rsi
       sub       rsp,20
       mov       rsi,rcx
       mov       ecx,[rsi+34]
       test      ecx,1600000
       jne       short M04_L00
       mov       rcx,rsi
       xor       r8d,r8d
       mov       edx,0FFFFFFFF
       call      System.Threading.Tasks.Task.InternalWaitCore(Int32, System.Threading.CancellationToken)
M04_L00:
       mov       rcx,rsi
       call      System.Threading.Tasks.Task.NotifyDebuggerOfWaitCompletionIfNecessary()
       mov       ecx,[rsi+34]
       and       ecx,1600000
       cmp       ecx,1000000
       je        short M04_L01
       mov       rcx,rsi
       call      System.Runtime.CompilerServices.TaskAwaiter.ThrowForNonSuccess(System.Threading.Tasks.Task)
M04_L01:
       nop
       add       rsp,20
       pop       rsi
       ret
; Total bytes of code 75

Docs

Profiling workflow for dotnet/runtime repository
Benchmarking workflow for dotnet/runtime repository

Author: DrewScoggins
Assignees: -
Labels:

area-System.IO, needs further triage, tenet-performance, tenet-performance-benchmarks

Milestone: -

@adamsitnik adamsitnik removed the needs-further-triage Issue has been initially triaged, but needs deeper consideration or reconsideration label Apr 15, 2021
@adamsitnik
Copy link
Member

This is by design. #48813 has made FlushAsync and ReadAsync actually 100% async, previously they were not (#27643, #16341)

@adamsitnik adamsitnik self-assigned this Apr 15, 2021
@adamsitnik adamsitnik added this to the 6.0.0 milestone Apr 15, 2021
@adamsitnik adamsitnik removed the tenet-performance-benchmarks Issue from performance benchmark label Apr 15, 2021
@ghost ghost locked as resolved and limited conversation to collaborators May 15, 2021
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-System.IO tenet-performance Performance related issue
Projects
None yet
Development

No branches or pull requests

4 participants