-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
JIT: Bad codegen on win-x86 #90219
Labels
area-CodeGen-coreclr
CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Milestone
Comments
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch Issue Details// Generated by Fuzzlyn v1.5 on 2023-08-05 16:08:41
// Run on X86 Windows
// Seed: 7610693270284065118
// Reduced from 12.0 KiB to 2.2 KiB in 00:01:06
// Debug: Outputs 1
// Release: Outputs 2
public interface I0
{
}
public class C0 : I0
{
public ulong F0;
public uint F4;
public C0(ulong f0, uint f4)
{
F0 = f0;
F4 = f4;
}
}
public class Program
{
public static IRuntime s_rt;
public static I0 s_1;
public static uint s_2;
public static byte[] s_4 = new byte[]{0};
public static void Main()
{
CollectibleALC alc = new CollectibleALC();
System.Reflection.Assembly asm = alc.LoadFromAssemblyPath(System.Reflection.Assembly.GetExecutingAssembly().Location);
System.Reflection.MethodInfo mi = asm.GetType(typeof(Program).FullName).GetMethod(nameof(MainInner));
System.Type runtimeTy = asm.GetType(typeof(Runtime).FullName);
mi.Invoke(null, new object[]{System.Activator.CreateInstance(runtimeTy)});
}
public static void MainInner(IRuntime rt)
{
s_rt = rt;
var vr8 = new C0(0, 0);
var vr12 = s_4[0];
vr8.F0 *= M2(vr12);
long[] vr9 = new long[]{0};
bool vr10 = (int)M2(0) <= vr9[0];
}
public static ulong M2(byte arg0)
{
for (int var0 = 0; var0 < 2; var0++)
{
var vr1 = new C0(0, 0);
var vr3 = new C0(0, 0);
uint vr14 = s_2;
s_rt.WriteLine("c_0", vr14);
uint vr15 = vr3.F4;
s_2 = Program.M3(vr1, vr15);
s_1 = s_1;
s_1 = new C0(0, 0);
s_rt.WriteLine("c_6", var0);
}
var vr5 = new C0(0, 1);
uint vr18 = vr5.F4;
arg0 = (byte)vr18;
var vr7 = new C0(0, 1838341904U);
if ((arg0 >= (byte)M3(vr7, 0)))
{
arg0 <<= 1;
}
s_rt.WriteLine("c_7", arg0);
return 0;
}
public static uint M3(C0 argThis, uint arg0)
{
s_rt.WriteLine("c_0", arg0);
return argThis.F4;
}
}
public interface IRuntime
{
void WriteLine<T>(string site, T value);
}
public class Runtime : IRuntime
{
public void WriteLine<T>(string site, T value) => System.Console.WriteLine(value);
}
public class CollectibleALC : System.Runtime.Loader.AssemblyLoadContext
{
public CollectibleALC(): base(true)
{
}
}
|
This is another case of morph using a subrange assertion to try to optimize away normalization, where the backend then does not satisfy the necessary invariants to make this work correctly. Some annotated codegen: G_M21776_IG12: ;; offset=0x010B
mov dword ptr [ecx+0x04], edx
mov dword ptr [eax+0x0C], 1
movzx ecx, byte ptr [eax+0x0C]
mov byte ptr [ebp-0x10], cl ; arg0 = 0; 1 byte write
mov ecx, 0xAC1D838 ; C0
call CORINFO_HELP_NEWSFAST
lea ecx, bword ptr [eax+0x04]
xor edx, edx
mov dword ptr [ecx], edx
;; size=34 bbWeight=1 PerfScore 8.00
G_M21776_IG13: ;; offset=0x012D
mov dword ptr [ecx+0x04], edx
mov dword ptr [eax+0x0C], 0x6D92DF10
mov ecx, eax
;; size=12 bbWeight=1 PerfScore 2.25
G_M21776_IG14: ;; offset=0x0139
call [Program:M3(C0,uint):uint]
movzx ecx, al
mov eax, dword ptr [ebp-0x10] ; load arg0; 4 byte read
movzx edx, al ; normalize it properly, ok
add edx, edx
movzx edx, dl
cmp ecx, eax ; use of unnormalized arg0; not ok
cmovle eax, edx
mov byte ptr [ebp-0x10], al
mov esi, gword ptr [ebx]
push 0xAC1DBB8
mov ecx, esi
mov edx, 0xAC1D338 ; IRuntime
call CORINFO_HELP_VIRTUAL_FUNC_PTR
movzx edx, byte ptr [ebp-0x10]
push edx
mov edx, gword ptr [0x070F3020] ; string handle
mov ecx, esi
call eax
xor eax, eax
xor edx, edx |
jakobbotsch
added a commit
to jakobbotsch/runtime
that referenced
this issue
Aug 10, 2023
The existing logic would sometimes unspill using the type of the local that is being unspilled. This type is often wider than the exact small type in the LclVarDsc, since NOL locals are normally expanded as CAST(<small type>, LCL_VAR<int>). This causes problems since morph will in some cases avoid inserting normalization for NOL locals when it has a subrange assertion available. This optimization relies on the backend always ensuring that the local will be normalized as part of unspilling and args homing. Size-wise regressions are expected on xarch since the encoding of the normalizing load is larger. However, as we have seen before, using wide loads can cause significant store-forwarding stalls which can have large negative impact on performance, so overall there is an expected perf benefit of using the small loads (in addition to the correctness fix). Fix dotnet#90219
jakobbotsch
added a commit
that referenced
this issue
Aug 14, 2023
The existing logic would sometimes unspill using the type of the local that is being unspilled. This type is often wider than the exact small type in the LclVarDsc, since NOL locals are normally expanded as CAST(<small type>, LCL_VAR<int>). This causes problems since morph will in some cases avoid inserting normalization for NOL locals when it has a subrange assertion available. This optimization relies on the backend always ensuring that the local will be normalized as part of unspilling and args homing. Size-wise regressions are expected on xarch since the encoding of the normalizing load is larger. However, as we have seen before, using wide loads can cause significant store-forwarding stalls which can have large negative impact on performance, so overall there is an expected perf benefit of using the small loads (in addition to the correctness fix). Fix #90219
Sign up for free
to subscribe to this conversation on GitHub.
Already have an account?
Sign in.
Labels
area-CodeGen-coreclr
CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
The text was updated successfully, but these errors were encountered: