forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X64] [MichalPetryka] Rewrite math jit helpers to managed code #264
Comments
Top method regressions7 (2.69 % of base) - System.Math:IEEERemainder(double,double):double ; Assembly listing for method System.Math:IEEERemainder(double,double):double (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 4 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 9, 6.50) double -> [rbp-0x08] single-def
; V01 arg1 [V01,T02] ( 8, 5 ) double -> [rbp-0x10] single-def
; V02 loc0 [V02,T03] ( 7, 3.50) double -> [rbp-0x18] spill-single-def
; V03 loc1 [V03,T04] ( 3, 1.50) double -> mm0
; V04 loc2 [V04,T05] ( 3, 1.50) double -> mm4
;* V05 loc3 [V05 ] ( 0, 0 ) double -> zero-ref
;# V06 OutArgs [V06 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V07 tmp1 [V07,T01] ( 7, 8 ) double -> mm0 "Single return block return value"
; V08 cse0 [V08,T06] ( 3, 1.50) double -> mm3 "CSE - moderate"
; V09 cse1 [V09,T07] ( 3, 1.50) double -> mm4 "CSE - moderate"
;
; Lcl frame size = 32
G_M14330_IG01:
push rbp
sub rsp, 32
lea rbp, [rsp+0x20]
;; size=10 bbWeight=1 PerfScore 1.75
G_M14330_IG02:
vucomisd xmm0, xmm0
jp SHORT G_M14330_IG03
je SHORT G_M14330_IG04
;; size=8 bbWeight=1 PerfScore 4.00
G_M14330_IG03:
jmp G_M14330_IG13
;; size=5 bbWeight=0.50 PerfScore 1.00
G_M14330_IG04:
vucomisd xmm1, xmm1
jp SHORT G_M14330_IG05
je SHORT G_M14330_IG06
;; size=8 bbWeight=0.50 PerfScore 2.00
G_M14330_IG05:
vmovaps xmm0, xmm1
jmp G_M14330_IG13
;; size=9 bbWeight=0.50 PerfScore 1.12
G_M14330_IG06:
vmovsd qword ptr [rbp-0x08], xmm0
vmovsd qword ptr [rbp-0x10], xmm1
- call CORINFO_HELP_DBLREM
+ mov rax, 0xD1FFAB1E ; code for CORINFO_HELP_DBLREM
+ call [rax]CORINFO_HELP_DBLREM
vmovsd qword ptr [rbp-0x18], xmm0
vucomisd xmm0, xmm0
jp SHORT G_M14330_IG07
je SHORT G_M14330_IG08
- ;; size=28 bbWeight=0.50 PerfScore 4.00
+ ;; size=35 bbWeight=0.50 PerfScore 5.12
G_M14330_IG07:
vmovsd xmm0, qword ptr [reloc @RWD00]
jmp G_M14330_IG13
;; size=13 bbWeight=0.50 PerfScore 2.50
G_M14330_IG08:
vxorps xmm1, xmm1, xmm1
vucomisd xmm0, xmm1
jp SHORT G_M14330_IG09
jne SHORT G_M14330_IG09
vmovsd xmm1, qword ptr [rbp-0x08]
vmovd rax, xmm1
test rax, rax
jge SHORT G_M14330_IG10
vmovsd xmm0, qword ptr [reloc @RWD08]
jmp G_M14330_IG13
;; size=40 bbWeight=0.50 PerfScore 7.79
G_M14330_IG09:
vmovsd xmm1, qword ptr [rbp-0x08]
;; size=5 bbWeight=0.25 PerfScore 0.75
G_M14330_IG10:
vmovsd qword ptr [rbp-0x08], xmm1
vmovaps xmm0, xmm1
mov rax, 0xD1FFAB1E ; code for System.Math:Sign(double):int
call [rax]System.Math:Sign(double):int
vxorps xmm0, xmm0, xmm0
vcvtsi2sd xmm0, xmm0, eax
vmovsd xmm2, qword ptr [rbp-0x10]
vandps xmm1, xmm2, xmmword ptr [reloc @RWD16]
vmulsd xmm0, xmm0, xmm1
vmovsd xmm1, qword ptr [rbp-0x18]
vsubsd xmm0, xmm1, xmm0
vandps xmm3, xmm0, xmmword ptr [reloc @RWD16]
vandps xmm4, xmm1, xmmword ptr [reloc @RWD16]
vucomisd xmm3, xmm4
jp SHORT G_M14330_IG12
jne SHORT G_M14330_IG12
vmovsd xmm3, qword ptr [rbp-0x08]
vdivsd xmm4, xmm3, xmm2
vroundsd xmm2, xmm2, xmm4, 4
vandps xmm2, xmm2, xmmword ptr [reloc @RWD16]
vandps xmm4, xmm4, xmmword ptr [reloc @RWD16]
vucomisd xmm2, xmm4
ja SHORT G_M14330_IG13
;; size=116 bbWeight=0.50 PerfScore 30.92
G_M14330_IG11:
vmovaps xmm0, xmm1
jmp SHORT G_M14330_IG13
;; size=6 bbWeight=0.50 PerfScore 1.12
G_M14330_IG12:
vucomisd xmm4, xmm3
jbe SHORT G_M14330_IG11
;; size=6 bbWeight=0.50 PerfScore 1.50
G_M14330_IG13:
add rsp, 32
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
RWD00 dq FFF8000000000000h ; -nan
RWD08 dq 8000000000000000h ; -0
RWD16 dq 7FFFFFFFFFFFFFFFh, 7FFFFFFFFFFFFFFFh
-; Total bytes of code 260, prolog size 10, PerfScore 60.21, instruction count 62, allocated bytes for code 260 (MethodHash=dbe0c805) for method System.Math:IEEERemainder(double,double):double (FullOpts)
+; Total bytes of code 267, prolog size 10, PerfScore 61.33, instruction count 63, allocated bytes for code 267 (MethodHash=dbe0c805) for method System.Math:IEEERemainder(double,double):double (FullOpts) 7 (2.71 % of base) - System.MathF:IEEERemainder(float,float):float ; Assembly listing for method System.MathF:IEEERemainder(float,float):float (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 5 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T00] ( 9, 6.50) float -> [rbp-0x04] single-def
; V01 arg1 [V01,T02] ( 8, 5 ) float -> [rbp-0x08] single-def
; V02 loc0 [V02,T03] ( 7, 3.50) float -> [rbp-0x0C] spill-single-def
; V03 loc1 [V03,T04] ( 3, 1.50) float -> mm0
; V04 loc2 [V04,T05] ( 3, 1.50) float -> mm4
;* V05 loc3 [V05 ] ( 0, 0 ) float -> zero-ref
;# V06 OutArgs [V06 ] ( 1, 1 ) struct ( 0) [rsp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
; V07 tmp1 [V07,T01] ( 7, 8 ) float -> mm0 "Single return block return value"
; V08 cse0 [V08,T06] ( 3, 1.50) float -> mm3 "CSE - moderate"
; V09 cse1 [V09,T07] ( 3, 1.50) float -> mm4 "CSE - moderate"
;
; Lcl frame size = 16
G_M34681_IG01:
push rbp
sub rsp, 16
lea rbp, [rsp+0x10]
;; size=10 bbWeight=1 PerfScore 1.75
G_M34681_IG02:
vucomiss xmm0, xmm0
jp SHORT G_M34681_IG03
je SHORT G_M34681_IG04
;; size=8 bbWeight=1 PerfScore 4.00
G_M34681_IG03:
jmp G_M34681_IG13
;; size=5 bbWeight=0.50 PerfScore 1.00
G_M34681_IG04:
vucomiss xmm1, xmm1
jp SHORT G_M34681_IG05
je SHORT G_M34681_IG06
;; size=8 bbWeight=0.50 PerfScore 2.00
G_M34681_IG05:
vmovaps xmm0, xmm1
jmp G_M34681_IG13
;; size=9 bbWeight=0.50 PerfScore 1.12
G_M34681_IG06:
vmovss dword ptr [rbp-0x04], xmm0
vmovss dword ptr [rbp-0x08], xmm1
- call CORINFO_HELP_FLTREM
+ mov rax, 0xD1FFAB1E ; code for CORINFO_HELP_FLTREM
+ call [rax]CORINFO_HELP_FLTREM
vmovss dword ptr [rbp-0x0C], xmm0
vucomiss xmm0, xmm0
jp SHORT G_M34681_IG07
je SHORT G_M34681_IG08
- ;; size=28 bbWeight=0.50 PerfScore 4.00
+ ;; size=35 bbWeight=0.50 PerfScore 5.12
G_M34681_IG07:
vmovss xmm0, dword ptr [reloc @RWD00]
jmp G_M34681_IG13
;; size=13 bbWeight=0.50 PerfScore 2.50
G_M34681_IG08:
vxorps xmm1, xmm1, xmm1
vucomiss xmm0, xmm1
jp SHORT G_M34681_IG09
jne SHORT G_M34681_IG09
vmovss xmm1, dword ptr [rbp-0x04]
vmovd eax, xmm1
test eax, eax
jge SHORT G_M34681_IG10
vmovss xmm0, dword ptr [reloc @RWD04]
jmp G_M34681_IG13
;; size=38 bbWeight=0.50 PerfScore 7.79
G_M34681_IG09:
vmovss xmm1, dword ptr [rbp-0x04]
;; size=5 bbWeight=0.25 PerfScore 0.75
G_M34681_IG10:
vmovss dword ptr [rbp-0x04], xmm1
vmovaps xmm0, xmm1
mov rax, 0xD1FFAB1E ; code for System.Math:Sign(float):int
call [rax]System.Math:Sign(float):int
vxorps xmm0, xmm0, xmm0
vcvtsi2ss xmm0, xmm0, eax
vmovss xmm2, dword ptr [rbp-0x08]
vandps xmm1, xmm2, xmmword ptr [reloc @RWD16]
vmulss xmm0, xmm0, xmm1
vmovss xmm1, dword ptr [rbp-0x0C]
vsubss xmm0, xmm1, xmm0
vandps xmm3, xmm0, xmmword ptr [reloc @RWD16]
vandps xmm4, xmm1, xmmword ptr [reloc @RWD16]
vucomiss xmm3, xmm4
jp SHORT G_M34681_IG12
jne SHORT G_M34681_IG12
vmovss xmm3, dword ptr [rbp-0x04]
vdivss xmm4, xmm3, xmm2
vroundss xmm2, xmm2, xmm4, 4
vandps xmm2, xmm2, xmmword ptr [reloc @RWD16]
vandps xmm4, xmm4, xmmword ptr [reloc @RWD16]
vucomiss xmm2, xmm4
ja SHORT G_M34681_IG13
;; size=116 bbWeight=0.50 PerfScore 29.92
G_M34681_IG11:
vmovaps xmm0, xmm1
jmp SHORT G_M34681_IG13
;; size=6 bbWeight=0.50 PerfScore 1.12
G_M34681_IG12:
vucomiss xmm4, xmm3
jbe SHORT G_M34681_IG11
;; size=6 bbWeight=0.50 PerfScore 1.50
G_M34681_IG13:
add rsp, 16
pop rbp
ret
;; size=6 bbWeight=1 PerfScore 1.75
RWD00 dd FFC00000h ; -nan
RWD04 dd 80000000h ; -0
RWD08 dd 00000000h, 00000000h
RWD16 dq 7FFFFFFF7FFFFFFFh, 7FFFFFFF7FFFFFFFh
-; Total bytes of code 258, prolog size 10, PerfScore 59.21, instruction count 62, allocated bytes for code 258 (MethodHash=40f07886) for method System.MathF:IEEERemainder(float,float):float (FullOpts)
+; Total bytes of code 265, prolog size 10, PerfScore 60.33, instruction count 63, allocated bytes for code 265 (MethodHash=40f07886) for method System.MathF:IEEERemainder(float,float):float (FullOpts) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Build completed in 2 hours 22 minutes.
dotnet/runtime#98858
CoreLib diffs
Frameworks diffs
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: