Skip to content

[JIT] Performance bug Windows vs Linux #71311

Closed
@mdenhoedt

Description

@mdenhoedt

Description

The Triplet.Plus method has different performance behaviour on Windows and Ubuntu. I rewrote this method using an out parameter, which made it a bit less performant on Windows and way more performant on Ubuntu. Obviously, I would like to avoid the variant with the out parameter, because it results in less readable code.

The JIT also emitted different assembly. See below the code of the simplest repro I could make, which is a synthetic benchmark. I did a similar change on my ASP.NET Core 6.0 application, with similar conclusions.

I consider this a performance bug. Please let me know if I can provide further assistance.

using System.Runtime.InteropServices;

namespace MyNamespace;

[StructLayout(LayoutKind.Sequential, Pack = 1)]
public readonly struct Triplet
{
    public int A { get; }
    public int B { get; }
    public int C { get; }

    public Triplet(int a, int b, int c)
    {
        A = a;
        B = b;
        C = c;
    }

    public static readonly Triplet Max = new Triplet(int.MaxValue, int.MaxValue, int.MaxValue);
    public bool IsMax => A == int.MaxValue;

    public static Triplet Plus(in Triplet left, in Triplet right)
    {
        if (left.IsMax || right.IsMax)
        {
            return Max;
        }

        checked
        {
            int a = left.A + right.A;
            int b = left.B + right.B;
            int c = left.C + right.C;

            return new Triplet(a, b, c);
        }
    }

    public static void PlusOut(in Triplet left, in Triplet right, out Triplet result)
    {

        if (left.IsMax || right.IsMax)
        {
            result = Max;
            return;
        }

        checked
        {
            int a = left.A + right.A;
            int b = left.B + right.B;
            int c = left.C + right.C;

            result = new Triplet(a, b, c);
        }
    }
}

using BenchmarkDotNet.Attributes;

[DisassemblyDiagnoser(printSource: true, exportGithubMarkdown: true, exportDiff: true)]
public class TripletBenchmark
{
    private readonly Triplet tripletA = new(100, 100, 0);
    private readonly Triplet tripletB = new(2340, 100, 0);

    [Benchmark(Baseline = true)]
    public bool TripletPlus()
    {
        return Triplet.Plus(tripletA, tripletB).A < 5000;
    }

    [Benchmark]
    public bool TripletPlusOut()
    {
        Triplet.PlusOut(tripletA, tripletB, out Triplet temp);
        return temp.A < 5000;
    }
}

Configuration

The Windows data I gather by running on my laptop. The Ubuntu data was gathered on an Azure VM (Standard D4ds v5). For more precide configuration information consult the 'Data' section.

Regression

I haven't tried any other versions of .NET.

Data

Windows

BenchmarkDotNet=v0.13.1, OS=Windows 10.0.19042.1766 (20H2/October2020Update)
Intel Core i7-7600U CPU 2.80GHz (Kaby Lake), 1 CPU, 4 logical and 2 physical cores
.NET SDK=6.0.301
  [Host]     : .NET 6.0.6 (6.0.622.26707), X64 RyuJIT
  DefaultJob : .NET 6.0.6 (6.0.622.26707), X64 RyuJIT
Method Mean Error StdDev Median Ratio RatioSD Code Size
TripletPlus 3.110 ns 0.0711 ns 0.0631 ns 3.112 ns 1.00 0.00 190 B
TripletPlusOut 3.510 ns 0.3743 ns 1.0799 ns 3.077 ns 1.18 0.34 191 B

Ubuntu

BenchmarkDotNet=v0.13.1, OS=ubuntu 20.04
Intel Xeon Platinum 8370C CPU 2.80GHz, 1 CPU, 4 logical and 2 physical cores
.NET SDK=6.0.301
  [Host]     : .NET 6.0.6 (6.0.622.26707), X64 RyuJIT
  DefaultJob : .NET 6.0.6 (6.0.622.26707), X64 RyuJIT
Method Mean Error StdDev Ratio Code Size
TripletPlus 8.537 ns 0.0032 ns 0.0028 ns 1.00 214 B
TripletPlusOut 2.315 ns 0.0036 ns 0.0033 ns 0.27 206 B

Analysis

Windows

.NET 6.0.6 (6.0.622.26707), X64 RyuJIT

; MyNamespace.TripletBenchmark.TripletPlus()
       sub       rsp,38
       xor       eax,eax
       mov       [rsp+28],rax
       mov       [rsp+30],rax
       cmp       [rcx],ecx
       lea       rdx,[rcx+8]
       lea       r8,[rcx+18]
       lea       rcx,[rsp+28]
       call      MyNamespace.Triplet.Plus(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       cmp       dword ptr [rsp+28],1388
       setl      al
       movzx     eax,al
       add       rsp,38
       ret
; Total bytes of code 55
; MyNamespace.Triplet.Plus(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,20
       mov       rsi,rcx
       mov       edi,[rdx]
       cmp       edi,7FFFFFFF
       je        short M01_L00
       mov       ebx,[r8]
       cmp       ebx,7FFFFFFF
       jne       short M01_L01
M01_L00:
       mov       rcx,7FF7C7934FC8
       mov       edx,16
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       mov       rax,1F033C37350
       mov       rax,[rax]
       mov       rdx,[rax+8]
       mov       [rsi],rdx
       mov       edx,[rax+10]
       mov       [rsi+8],edx
       mov       rax,rsi
       add       rsp,20
       pop       rbx
       pop       rsi
       pop       rdi
       ret
M01_L01:
       add       edi,ebx
       jo        short M01_L02
       mov       eax,[rdx+4]
       add       eax,[r8+4]
       jo        short M01_L02
       mov       edx,[rdx+8]
       add       edx,[r8+8]
       jo        short M01_L02
       mov       [rsi],edi
       mov       [rsi+4],eax
       mov       [rsi+8],edx
       mov       rax,rsi
       add       rsp,20
       pop       rbx
       pop       rsi
       pop       rdi
       ret
M01_L02:
       call      CORINFO_HELP_OVERFLOW
       int       3
; Total bytes of code 135

.NET 6.0.6 (6.0.622.26707), X64 RyuJIT

; MyNamespace.TripletBenchmark.TripletPlusOut()
       sub       rsp,38
       xor       eax,eax
       mov       [rsp+28],rax
       mov       [rsp+30],rax
       cmp       [rcx],ecx
       mov       [rsp+40],rcx
       add       rcx,8
       mov       rdx,[rsp+40]
       add       rdx,18
       lea       r8,[rsp+28]
       call      MyNamespace.Triplet.PlusOut(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       cmp       dword ptr [rsp+28],1388
       setl      al
       movzx     eax,al
       add       rsp,38
       ret
; Total bytes of code 65
; MyNamespace.Triplet.PlusOut(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,20
       mov       rsi,r8
       mov       edi,[rcx]
       cmp       edi,7FFFFFFF
       je        short M01_L00
       mov       ebx,[rdx]
       cmp       ebx,7FFFFFFF
       jne       short M01_L01
M01_L00:
       mov       rcx,7FF7C7914FC8
       mov       edx,16
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       mov       rax,16818E97350
       mov       rax,[rax]
       mov       rdx,[rax+8]
       mov       [rsi],rdx
       mov       edx,[rax+10]
       mov       [rsi+8],edx
       add       rsp,20
       pop       rbx
       pop       rsi
       pop       rdi
       ret
M01_L01:
       add       edi,ebx
       jo        short M01_L02
       mov       eax,[rcx+4]
       add       eax,[rdx+4]
       jo        short M01_L02
       mov       ecx,[rcx+8]
       add       ecx,[rdx+8]
       jo        short M01_L02
       mov       [rsi],edi
       mov       [rsi+4],eax
       mov       [rsi+8],ecx
       add       rsp,20
       pop       rbx
       pop       rsi
       pop       rdi
       ret
M01_L02:
       call      CORINFO_HELP_OVERFLOW
       int       3
; Total bytes of code 126

Ubuntu

.NET 6.0.6 (6.0.622.26707), X64 RyuJIT

; MyNamespace.TripletBenchmark.TripletPlus()
       sub       rsp,18
       cmp       [rdi],edi
       mov       [rsp],rdi
       add       rdi,8
       mov       rsi,[rsp]
       add       rsi,18
       call      MyNamespace.Triplet.Plus(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       mov       [rsp+8],rax
       mov       [rsp+10],edx
       cmp       dword ptr [rsp+8],1388
       setl      al
       movzx     eax,al
       add       rsp,18
       ret
; Total bytes of code 55
; MyNamespace.Triplet.Plus(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       push      rbp
       push      r14
       push      rbx
       sub       rsp,20
       lea       rbp,[rsp+30]
       mov       ebx,[rdi]
       cmp       ebx,7FFFFFFF
       je        short M01_L00
       mov       r14d,[rsi]
       cmp       r14d,7FFFFFFF
       jne       short M01_L01
M01_L00:
       mov       rdi,7F945206D5E0
       mov       esi,16
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       mov       rax,7F94340053D0
       mov       rax,[rax]
       mov       rdi,[rax+8]
       mov       [rbp-20],rdi
       mov       edi,[rax+10]
       mov       [rbp-18],edi
       mov       rax,[rbp-20]
       mov       edx,[rbp-18]
       add       rsp,20
       pop       rbx
       pop       r14
       pop       rbp
       ret
M01_L01:
       add       ebx,r14d
       jo        short M01_L02
       mov       eax,[rdi+4]
       add       eax,[rsi+4]
       jo        short M01_L02
       mov       edi,[rdi+8]
       add       edi,[rsi+8]
       jo        short M01_L02
       xor       esi,esi
       mov       [rbp-30],rsi
       mov       [rbp-28],esi
       mov       [rbp-30],ebx
       mov       [rbp-2C],eax
       mov       [rbp-28],edi
       mov       rax,[rbp-30]
       mov       edx,[rbp-28]
       add       rsp,20
       pop       rbx
       pop       r14
       pop       rbp
       ret
M01_L02:
       call      CORINFO_HELP_OVERFLOW
       int       3
; Total bytes of code 159

.NET 6.0.6 (6.0.622.26707), X64 RyuJIT

; MyNamespace.TripletBenchmark.TripletPlusOut()
       sub       rsp,18
       xor       eax,eax
       mov       [rsp+8],rax
       mov       [rsp+10],rax
       cmp       [rdi],edi
       mov       [rsp],rdi
       add       rdi,8
       mov       rsi,[rsp]
       add       rsi,18
       lea       rdx,[rsp+8]
       call      MyNamespace.Triplet.PlusOut(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       cmp       dword ptr [rsp+8],1388
       setl      al
       movzx     eax,al
       add       rsp,18
       ret
; Total bytes of code 63
; MyNamespace.Triplet.PlusOut(MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef, MyNamespace.Triplet ByRef)
       push      rbp
       push      r15
       push      r14
       push      rbx
       push      rax
       lea       rbp,[rsp+20]
       mov       rbx,rdx
       mov       r14d,[rdi]
       cmp       r14d,7FFFFFFF
       je        short M01_L00
       mov       r15d,[rsi]
       cmp       r15d,7FFFFFFF
       jne       short M01_L01
M01_L00:
       mov       rdi,7FBF90B9D5E0
       mov       esi,16
       call      CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE
       mov       rax,7FBF700053D0
       mov       rax,[rax]
       mov       rdi,[rax+8]
       mov       [rbx],rdi
       mov       edi,[rax+10]
       mov       [rbx+8],edi
       add       rsp,8
       pop       rbx
       pop       r14
       pop       r15
       pop       rbp
       ret
M01_L01:
       add       r14d,r15d
       jo        short M01_L02
       mov       eax,[rdi+4]
       add       eax,[rsi+4]
       jo        short M01_L02
       mov       edi,[rdi+8]
       add       edi,[rsi+8]
       jo        short M01_L02
       mov       [rbx],r14d
       mov       [rbx+4],eax
       mov       [rbx+8],edi
       add       rsp,8
       pop       rbx
       pop       r14
       pop       r15
       pop       rbp
       ret
M01_L02:
       call      CORINFO_HELP_OVERFLOW
       int       3
; Total bytes of code 143

Metadata

Metadata

Assignees

No one assigned

    Labels

    area-CodeGen-coreclrCLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMItenet-performancePerformance related issue

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions