-
Notifications
You must be signed in to change notification settings - Fork 5.2k
[RISC-V] Enable constant CSE in RISCV64 #121739
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch |
|
Diffs are based on 25,990 contexts (673 MinOpts, 25,317 FullOpts). MISSED contexts: base: 41 (0.16%), diff: 1 (0.00%) Overall (-35,746 bytes)
MinOpts (+0 bytes)
FullOpts (-35,746 bytes)
Example diffsSystem.Private.CoreLib.mch-40 (-38.46%) : 19577.dasm - System.ComAwareWeakReference:InitializeCallbacks(ptr,ptr,ptr) (FullOpts)@@ -11,6 +11,7 @@
; V01 arg1 [V01,T01] ( 3, 3 ) long -> a1 single-def
; V02 arg2 [V02,T02] ( 3, 3 ) long -> a2 single-def
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
+; V04 cse0 [V04,T03] ( 4, 4 ) long -> a3 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -22,32 +23,22 @@ G_M38620_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M38620_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a0, 0xD1FFAB1E(t6)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a2, 0xD1FFAB1E(t6)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a1, 0xD1FFAB1E(t6)
- ;; size=72 bbWeight=1 PerfScore 27.00
+ lui a3, 0xD1FFAB1E
+ addiw a3, a3, 0xD1FFAB1E
+ slli a3, a3, 12
+ addi a3, a3, 0xD1FFAB1E
+ slli a3, a3, 3
+ sd a0, 0xD1FFAB1E(a3)
+ sd a2, 0xD1FFAB1E(a3)
+ sd a1, 0xD1FFAB1E(a3)
+ ;; size=32 bbWeight=1 PerfScore 17.00
G_M38620_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 104, prolog size 16, PerfScore 43.50, instruction count 14, allocated bytes for code 104 (MethodHash=8ffd6923) for method System.ComAwareWeakReference:InitializeCallbacks(ptr,ptr,ptr) (FullOpts)
+; Total bytes of code 64, prolog size 16, PerfScore 33.50, instruction count 12, allocated bytes for code 64 (MethodHash=8ffd6923) for method System.ComAwareWeakReference:InitializeCallbacks(ptr,ptr,ptr) (FullOpts)
; ============================================================
Unwind Info:
@@ -58,7 +49,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 52 (0x00034) Actual length = 104 (0x000068)
+ Function Length : 32 (0x00020) Actual length = 64 (0x000040)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-40 (-33.33%) : 22849.dasm - System.DateTimeOffset:.cctor() (FullOpts)@@ -39,6 +39,7 @@
;* V28 tmp28 [V28 ] ( 0, 0 ) long -> zero-ref "field V07._dateData (fldOffset=0x0)" P-INDEP
;* V29 tmp29 [V29 ] ( 0, 0 ) long -> zero-ref "field V13._dateData (fldOffset=0x0)" P-INDEP
;* V30 tmp30 [V30 ] ( 0, 0 ) long -> zero-ref "field V15._dateData (fldOffset=0x0)" P-INDEP
+; V31 cse0 [V31,T00] ( 5, 5 ) long -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -50,29 +51,19 @@ G_M10593_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M10593_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 16
- sw zero, 0xD1FFAB1E(t6)
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
slli a0, a0, 15
addi a0, a0, 0xD1FFAB1E
+ sw zero, 0xD1FFAB1E(a0)
auipc a1, 0xD1FFAB1E
ld a1, 0xD1FFAB1E(a1)
sd a1, 0xD1FFAB1E(a0)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 16
- sw zero, 0xD1FFAB1E(t6)
- lui a0, 0xD1FFAB1E
- addiw a0, a0, 0xD1FFAB1E
- slli a0, a0, 15
- addi a0, a0, 0xD1FFAB1E
+ sw zero, 0xD1FFAB1E(a0)
auipc a1, 0xD1FFAB1E
ld a1, 0xD1FFAB1E(a1)
sd a1, 0xD1FFAB1E(a0)
- ;; size=88 bbWeight=1 PerfScore 34.00
+ ;; size=48 bbWeight=1 PerfScore 24.00
G_M10593_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
@@ -82,7 +73,7 @@ RWD00 dq 2BCA2875F4373FFFh
RWD08 dq 089F7FF5F7B58000h
-; Total bytes of code 120, prolog size 16, PerfScore 50.50, instruction count 18, allocated bytes for code 120 (MethodHash=ed90d69e) for method System.DateTimeOffset:.cctor() (FullOpts)
+; Total bytes of code 80, prolog size 16, PerfScore 40.50, instruction count 15, allocated bytes for code 80 (MethodHash=ed90d69e) for method System.DateTimeOffset:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -93,7 +84,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 60 (0x0003c) Actual length = 120 (0x000078)
+ Function Length : 40 (0x00028) Actual length = 80 (0x000050)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-40 (-32.26%) : 321.dasm - System.TimeSpan:.cctor() (FullOpts)@@ -15,6 +15,7 @@
;* V04 tmp4 [V04 ] ( 0, 0 ) long -> zero-ref "field V01._ticks (fldOffset=0x0)" P-INDEP
;* V05 tmp5 [V05 ] ( 0, 0 ) long -> zero-ref "field V02._ticks (fldOffset=0x0)" P-INDEP
;* V06 tmp6 [V06 ] ( 0, 0 ) long -> zero-ref "field V03._ticks (fldOffset=0x0)" P-INDEP
+; V07 cse0 [V07,T00] ( 4, 4 ) long -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -26,37 +27,27 @@ G_M15924_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M15924_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd zero, 0xD1FFAB1E(t6)
- addiw a0, zero, 0xD1FFAB1E
- slli a0, a0, 63
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 12
addi a0, a0, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a0, 0xD1FFAB1E(t6)
- addiw a0, zero, 0xD1FFAB1E
- slli a0, a0, 63
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a0, 0xD1FFAB1E(t6)
- ;; size=92 bbWeight=1 PerfScore 32.00
+ slli a0, a0, 3
+ sd zero, 0xD1FFAB1E(a0)
+ addiw a1, zero, 0xD1FFAB1E
+ slli a1, a1, 63
+ addi a1, a1, 0xD1FFAB1E
+ sd a1, 0xD1FFAB1E(a0)
+ addiw a1, zero, 0xD1FFAB1E
+ slli a1, a1, 63
+ sd a1, 0xD1FFAB1E(a0)
+ ;; size=52 bbWeight=1 PerfScore 22.00
G_M15924_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-; Total bytes of code 124, prolog size 16, PerfScore 48.50, instruction count 16, allocated bytes for code 124 (MethodHash=2791c1cb) for method System.TimeSpan:.cctor() (FullOpts)
+; Total bytes of code 84, prolog size 16, PerfScore 38.50, instruction count 14, allocated bytes for code 84 (MethodHash=2791c1cb) for method System.TimeSpan:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -67,7 +58,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 62 (0x0003e) Actual length = 124 (0x00007c)
+ Function Length : 42 (0x0002a) Actual length = 84 (0x000054)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+40 (+17.54%) : 133.dasm - System.Diagnostics.Tracing.RuntimeEventSource:.ctor():this (FullOpts)@@ -38,6 +38,7 @@
;* V27 tmp26 [V27 ] ( 0, 0 ) ubyte -> zero-ref "V03.[013..014)"
;* V28 tmp27 [V28 ] ( 0, 0 ) ubyte -> zero-ref "V03.[014..015)"
;* V29 tmp28 [V29 ] ( 0, 0 ) ubyte -> zero-ref "V03.[015..016)"
+; V30 cse0 [V30,T03] ( 3, 3 ) int -> a1 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=32; lcl=0
@@ -94,8 +95,18 @@ G_M4783_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
; byrRegs -[t3]
addi a1, zero, 0xD1FFAB1E
sw a1, 0xD1FFAB1E(s1)
- auipc a1, 0xD1FFAB1E
- ld a1, 0xD1FFAB1E(a1)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ lui a2, 0xD1FFAB1E
+ addiw a2, a2, 0xD1FFAB1E
+ addw a2, a1, a2
+ zext.h a2, a2
+ slli.uw a2, a2, 32
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ or a2, a0, a2
+ slli.uw a1, a1, 48
+ or a1, a2, a1
auipc a2, 0xD1FFAB1E
ld a2, 0xD1FFAB1E(a2)
mv a0, s1
@@ -112,7 +123,7 @@ G_M4783_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
addi t6, t6, 0xD1FFAB1E
slli t6, t6, 2
ld a5, 0xD1FFAB1E(t6)
- ;; size=176 bbWeight=1 PerfScore 54.50
+ ;; size=216 bbWeight=1 PerfScore 61.50
G_M4783_IG03: ; bbWeight=1, epilog, nogc, extend
ld s2, 24(sp)
ld s1, 16(sp)
@@ -121,11 +132,10 @@ G_M4783_IG03: ; bbWeight=1, epilog, nogc, extend
addi sp, sp, 32
jr a5 // <unknown method>
;; size=24 bbWeight=1 PerfScore 11.50
-RWD00 dq 516D5A0549592C0Fh
-RWD08 dq 896C02024EA64BAAh
+RWD00 dq 896C02024EA64BAAh
-; Total bytes of code 228, prolog size 28, PerfScore 83.50, instruction count 35, allocated bytes for code 228 (MethodHash=d472ed50) for method System.Diagnostics.Tracing.RuntimeEventSource:.ctor():this (FullOpts)
+; Total bytes of code 268, prolog size 28, PerfScore 90.50, instruction count 43, allocated bytes for code 268 (MethodHash=d472ed50) for method System.Diagnostics.Tracing.RuntimeEventSource:.ctor():this (FullOpts)
; ============================================================
Unwind Info:
@@ -136,7 +146,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 114 (0x00072) Actual length = 228 (0x0000e4)
+ Function Length : 134 (0x00086) Actual length = 268 (0x00010c)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+30 (+13.16%) : 152.dasm - System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)@@ -38,6 +38,7 @@
;* V27 tmp26 [V27 ] ( 0, 0 ) ubyte -> zero-ref "V03.[013..014)"
;* V28 tmp27 [V28 ] ( 0, 0 ) ubyte -> zero-ref "V03.[014..015)"
;* V29 tmp28 [V29 ] ( 0, 0 ) ubyte -> zero-ref "V03.[015..016)"
+; V30 cse0 [V30,T03] ( 3, 3 ) int -> a1 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=32; lcl=0
@@ -94,8 +95,16 @@ G_M31483_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
; byrRegs -[t3]
addi a1, zero, 0xD1FFAB1E
sw a1, 0xD1FFAB1E(s1)
- auipc a1, 0xD1FFAB1E
- ld a1, 0xD1FFAB1E(a1)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ addiw a2, a1, 0xD1FFAB1E
+ zext.h a1, a1
+ slli.uw a1, a1, 32
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ or a1, a0, a1
+ slli.uw a2, a2, 48
+ or a1, a1, a2
auipc a2, 0xD1FFAB1E
ld a2, 0xD1FFAB1E(a2)
mv a0, s1
@@ -112,7 +121,7 @@ G_M31483_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
addi t6, t6, 0xD1FFAB1E
slli t6, t6, 2
ld a5, 0xD1FFAB1E(t6)
- ;; size=176 bbWeight=1 PerfScore 54.50
+ ;; size=206 bbWeight=1 PerfScore 59.50
G_M31483_IG03: ; bbWeight=1, epilog, nogc, extend
ld s2, 24(sp)
ld s1, 16(sp)
@@ -121,11 +130,10 @@ G_M31483_IG03: ; bbWeight=1, epilog, nogc, extend
addi sp, sp, 32
jr a5 // <unknown method>
;; size=24 bbWeight=1 PerfScore 11.50
-RWD00 dq 5DB95CEF0866B2B8h
-RWD08 dq 444A81FD0F0C1226h
+RWD00 dq 444A81FD0F0C1226h
-; Total bytes of code 228, prolog size 28, PerfScore 83.50, instruction count 35, allocated bytes for code 228 (MethodHash=51938504) for method System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)
+; Total bytes of code 258, prolog size 28, PerfScore 88.50, instruction count 42, allocated bytes for code 258 (MethodHash=51938504) for method System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)
; ============================================================
Unwind Info:
@@ -136,7 +144,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 114 (0x00072) Actual length = 228 (0x0000e4)
+ Function Length : 129 (0x00081) Actual length = 258 (0x000102)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+16 (+11.76%) : 3688.dasm - System.OrdinalComparer:GetHashCode():int:this (FullOpts)@@ -12,6 +12,7 @@
; V01 loc0 [V01,T01] ( 3, 2 ) int -> a0
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;* V03 tmp1 [V03 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
+; V04 cse0 [V04,T02] ( 2, 2 ) ref -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 8
Frame info. #outsz=0; #framesz=32; lcl=8
@@ -26,9 +27,14 @@ G_M30928_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {
; gcrRegs +[s1]
;; size=24 bbWeight=1 PerfScore 13.50
G_M30928_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
addi a1, zero, 0xD1FFAB1E
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
+ addi a0, a0, 0xD1FFAB1E
+ ; byrRegs +[a0]
lui a2, 0xD1FFAB1E
addiw a2, a2, 0xD1FFAB1E
lui a3, 0xD1FFAB1E
@@ -40,10 +46,11 @@ G_M30928_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
slli t6, t6, 2
ld a4, 0xD1FFAB1E(t6)
jalr a4 // <unknown method>
+ ; byrRegs -[a0]
lbu a1, 0xD1FFAB1E(s1)
sext.w a1, a1
beqz a1, G_M30928_IG05
- ;; size=68 bbWeight=1 PerfScore 23.50
+ ;; size=84 bbWeight=1 PerfScore 27.00
G_M30928_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; gcrRegs -[s1]
not a0, a0
@@ -60,10 +67,8 @@ G_M30928_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
ld fp, 8(sp)
addi sp, sp, 32
ret ;; size=20 bbWeight=0.50 PerfScore 4.75
-RWD00 dq 00002ACB341F29D4h
-
-; Total bytes of code 136, prolog size 20, PerfScore 46.75, instruction count 27, allocated bytes for code 136 (MethodHash=f0e4872f) for method System.OrdinalComparer:GetHashCode():int:this (FullOpts)
+; Total bytes of code 152, prolog size 20, PerfScore 50.25, instruction count 28, allocated bytes for code 152 (MethodHash=f0e4872f) for method System.OrdinalComparer:GetHashCode():int:this (FullOpts)
; ============================================================
Unwind Info:
@@ -74,7 +79,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 68 (0x00044) Actual length = 136 (0x000088)
+ Function Length : 76 (0x0004c) Actual length = 152 (0x000098)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze outputSystem.Private.CoreLib.mchDetail diffs |
|
Some regressions are caused by the failure to use PC-relative addressing. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Pull Request Overview
This PR enables constant Common Subexpression Elimination (CSE) optimization for the RISC-V64 architecture, bringing it to parity with ARM/ARM64 platforms. The optimization reduces redundant instruction sequences when loading large constants that are used multiple times.
- Extends constant CSE support from ARM/ARM64 to also include RISCV64
- Adds cost estimation logic for RISC-V64 immediate value loading in the JIT compiler
- Updates configuration constants and conditional compilation blocks to include TARGET_RISCV64
Reviewed Changes
Copilot reviewed 4 out of 4 changed files in this pull request and generated 12 comments.
| File | Description |
|---|---|
| src/coreclr/jit/optcse.cpp | Updates conditional compilation blocks and function logic to enable constant CSE for RISCV64 alongside ARM architectures |
| src/coreclr/jit/jitconfigvalues.h | Renames configuration constants from ARM-specific to ARM_RISCV64 to reflect the expanded platform support |
| src/coreclr/jit/gentree.cpp | Adds comprehensive cost estimation logic for RISC-V64 constant loading by duplicating the algorithm from emitter::emitLoadImmediate |
| src/coreclr/jit/emitriscv64.cpp | Fixes spelling error in comment ("zeroes" → "zeros") |
* Adjust costSz and costEx for GT_CNS_INT node * Add riscv64 in const CSE jitconfigvalues
Is this something which is fixed by @tomeksowi's #119203? Could you run combined diffs (after merging his branch into yours)? |
It is not clear that it's fixed after merging. The regression gets worse after the merge than before. I suspect the cause might be due to differences in environment variables, but I'm not really sure about it. The regression actually improved than the base(pc-rel-pointers) after merging it with this PR(hoist-loop), so I think the regressions were not caused by this PR. Diffs running on the base branch (pc-rel-pointers)Diffs are based on 12,996 contexts (338 MinOpts, 12,658 FullOpts). MISSED contexts: 22 (0.17%) Base JIT options: JitEnablePCRelAddr=0 Overall (+173,608 bytes)
MinOpts (+10,528 bytes)
FullOpts (+163,080 bytes)
Example diffsSystem.Private.CoreLib.mch+0 (0.00%) : 6.dasm - System.Runtime.CompilerServices.StaticsHelpers:GetGCStaticBase(ptr):byref (FullOpts)@@ -44,11 +44,11 @@ G_M27708_IG04: ; bbWeight=0.50, epilog, nogc, extend
ret ;; size=16 bbWeight=0.50 PerfScore 3.75
G_M27708_IG05: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
; byrRegs -[a0-a1]
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a1, 0xD1FFAB1E(t6)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ slli a1, a1, 32
+ srli a1, a1, 18
+ ld a1, 0xD1FFAB1E(a1)
;; size=20 bbWeight=0.50 PerfScore 3.00
G_M27708_IG06: ; bbWeight=0.50, epilog, nogc, extend
ld ra, 8(sp)+0 (0.00%) : 10.dasm - System.RuntimeTypeHandle:GetRuntimeTypeFromHandle(nint):System.RuntimeType (FullOpts)@@ -67,11 +67,11 @@ G_M56642_IG07: ; bbWeight=0.50, epilog, nogc, extend
ret ;; size=16 bbWeight=0.50 PerfScore 3.75
G_M56642_IG08: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=0000 {}, byrefRegs=0000 {}, gcvars, byref
; gcrRegs -[a0-a1]
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a1, 0xD1FFAB1E(t6)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ slli a1, a1, 32
+ srli a1, a1, 18
+ ld a1, 0xD1FFAB1E(a1)
;; size=20 bbWeight=0.50 PerfScore 3.00
G_M56642_IG09: ; bbWeight=0.50, epilog, nogc, extend
ld ra, 8(sp)+0 (0.00%) : 14.dasm - System.Runtime.CompilerServices.CastHelpers:ChkCastClassSpecial(ptr,System.Object):System.Object (FullOpts)@@ -40,17 +40,20 @@ G_M37025_IG03: ; bbWeight=0.50, gcrefRegs=40000 {s2}, byrefRegs=0000 {},
slli a0, a0, 13
addi a0, a0, 0xD1FFAB1E
slli a0, a0, 2
+ ; gcrRegs +[a0]
lui a1, 0xD1FFAB1E
addiw a1, a1, 0xD1FFAB1E
slli a1, a1, 13
addi a1, a1, 0xD1FFAB1E
slli a1, a1, 2
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a2, 0xD1FFAB1E(t6)
+ ; gcrRegs +[a1]
+ lui a2, 0xD1FFAB1E
+ addiw a2, a2, 0xD1FFAB1E
+ slli a2, a2, 32
+ srli a2, a2, 18
+ ld a2, 0xD1FFAB1E(a2)
jalr a2 // <unknown method>
+ ; gcrRegs -[a0-a1]
; gcr arg pop 0
;; size=64 bbWeight=0.50 PerfScore 9.50
G_M37025_IG04: ; bbWeight=1.50, gcrefRegs=40000 {s2}, byrefRegs=0000 {}, byref, isz
@@ -88,12 +91,12 @@ G_M37025_IG08: ; bbWeight=0.50, gcVars=0000000000000000 {}, gcrefRegs=400
mv a0, s1
mv a1, s2
; gcrRegs +[a1]
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- ld a2, 0xD1FFAB1E(t6)
+ lui a2, 0xD1FFAB1E
+ addiw a2, a2, 0xD1FFAB1E
+ slli a2, a2, 12
+ addi a2, a2, 0xD1FFAB1E
+ slli a2, a2, 3
+ ld a2, 0xD1FFAB1E(a2)
;; size=32 bbWeight=0.50 PerfScore 4.00
G_M37025_IG09: ; bbWeight=0.50, epilog, nogc, extend
ld s3, 40(sp)+32 (+61.54%) : 18192.dasm - System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)@@ -25,16 +25,25 @@ G_M64730_IG01: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=24 bbWeight=0 PerfScore 0.00
G_M64730_IG02: ; bbWeight=0, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
ld a0, 0xD1FFAB1E(s1)
- auipc ra, 0xD1FFAB1E
- jalr ra // <unknown method>
+ lui ra, 0xD1FFAB1E
+ addiw ra, ra, 0xD1FFAB1E
+ slli ra, ra, 12
+ addi ra, ra, 0xD1FFAB1E
+ slli ra, ra, 3
+ jalr ra, 0xD1FFAB1E(ra) // <unknown method>
mv a0, zero
- auipc ra, 0xD1FFAB1E
- jalr ra // CORINFO_HELP_THROW
- ; gcrRegs -[s1]
+ ; gcrRegs +[a0]
+ lui ra, 0xD1FFAB1E
+ addiw ra, ra, 0xD1FFAB1E
+ slli ra, ra, 13
+ addi ra, ra, 0xD1FFAB1E
+ slli ra, ra, 2
+ jalr ra, 0xD1FFAB1E(ra) // CORINFO_HELP_THROW
+ ; gcrRegs -[s1-a0]
ebreak
- ;; size=28 bbWeight=0 PerfScore 0.00
+ ;; size=60 bbWeight=0 PerfScore 0.00
-; Total bytes of code 52, prolog size 20, PerfScore 0.00, instruction count 11, allocated bytes for code 52 (MethodHash=666c0325) for method System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)
+; Total bytes of code 84, prolog size 20, PerfScore 0.00, instruction count 13, allocated bytes for code 84 (MethodHash=666c0325) for method System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)
; ============================================================
Unwind Info:
@@ -45,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
+ Function Length : 42 (0x0002a) Actual length = 84 (0x000054)
---- Epilog scopes ----
No epilogs
---- Unwind codes ----+16 (+44.44%) : 23080.dasm - System.Double:Pow(double,double):double (FullOpts)@@ -24,11 +24,15 @@ G_M57204_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
- auipc t2, 0xD1FFAB1E
- jr t2 // <unknown method>
- ;; size=20 bbWeight=1 PerfScore 8.50
+ lui t6, 0xD1FFAB1E
+ addiw t6, t6, 0xD1FFAB1E
+ slli t6, t6, 12
+ addi t6, t6, 0xD1FFAB1E
+ slli t6, t6, 3
+ jalr zero, 0xD1FFAB1E(t6) // <unknown method>
+ ;; size=36 bbWeight=1 PerfScore 12.50
-; Total bytes of code 36, prolog size 16, PerfScore 17.50, instruction count 8, allocated bytes for code 36 (MethodHash=6920208b) for method System.Double:Pow(double,double):double (FullOpts)
+; Total bytes of code 52, prolog size 16, PerfScore 21.50, instruction count 9, allocated bytes for code 52 (MethodHash=6920208b) for method System.Double:Pow(double,double):double (FullOpts)
; ============================================================
Unwind Info:
@@ -39,7 +43,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 18 (0x00012) Actual length = 36 (0x000024)
+ Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+16 (+44.44%) : 23020.dasm - System.Double:Log10(double):double (FullOpts)@@ -23,11 +23,15 @@ G_M51936_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
- auipc t2, 0xD1FFAB1E
- jr t2 // <unknown method>
- ;; size=20 bbWeight=1 PerfScore 8.50
+ lui t6, 0xD1FFAB1E
+ addiw t6, t6, 0xD1FFAB1E
+ slli t6, t6, 12
+ addi t6, t6, 0xD1FFAB1E
+ slli t6, t6, 3
+ jalr zero, 0xD1FFAB1E(t6) // <unknown method>
+ ;; size=36 bbWeight=1 PerfScore 12.50
-; Total bytes of code 36, prolog size 16, PerfScore 17.50, instruction count 8, allocated bytes for code 36 (MethodHash=5391351f) for method System.Double:Log10(double):double (FullOpts)
+; Total bytes of code 52, prolog size 16, PerfScore 21.50, instruction count 9, allocated bytes for code 52 (MethodHash=5391351f) for method System.Double:Log10(double):double (FullOpts)
; ============================================================
Unwind Info:
@@ -38,7 +42,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 18 (0x00012) Actual length = 36 (0x000024)
+ Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze outputDiffs running after merging the base branch(pc-rel-pointers) into this PR(hoist-loop)Diffs are based on 12,996 contexts (338 MinOpts, 12,658 FullOpts). MISSED contexts: base: 22 (0.17%), diff: 1 (0.01%) Base JIT options: JitEnablePCRelAddr=0 Overall (+155,002 bytes)
MinOpts (+10,528 bytes)
FullOpts (+144,474 bytes)
Example diffsSystem.Private.CoreLib.mch-44 (-31.43%) : 18642.dasm - System.Diagnostics.Stopwatch:.cctor() (FullOpts)@@ -8,6 +8,7 @@
; Final local variable assignments
;
;# V00 OutArgs [V00 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
+; V01 cse0 [V01,T00] ( 5, 5 ) long -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -21,32 +22,21 @@ G_M2214_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
G_M2214_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- sd a0, 0xD1FFAB1E(t6)
- addi a0, zero, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- sb a0, 0xD1FFAB1E(t6)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a0, 0xD1FFAB1E(t6)
- fcvt.d.l ft4, a0
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ sd a1, 0xD1FFAB1E(a0)
+ addi a1, zero, 0xD1FFAB1E
+ sb a1, 0xD1FFAB1E(a0)
+ ld a1, 0xD1FFAB1E(a0)
+ fcvt.d.l ft4, a1
auipc t6, 0xD1FFAB1E
fld ft5, 0xD1FFAB1E(t6)
fdiv.d ft4, ft5, ft4
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- fsd ft4, 0xD1FFAB1E(t6)
- ;; size=108 bbWeight=1 PerfScore 72.50
+ fsd ft4, 0xD1FFAB1E(a0)
+ ;; size=64 bbWeight=1 PerfScore 61.50
G_M2214_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
@@ -55,7 +45,7 @@ G_M2214_IG03: ; bbWeight=1, epilog, nogc, extend
RWD00 dq 416312D000000000h ; 10000000
-; Total bytes of code 140, prolog size 16, PerfScore 89.00, instruction count 21, allocated bytes for code 140 (MethodHash=b3f5f759) for method System.Diagnostics.Stopwatch:.cctor() (FullOpts)
+; Total bytes of code 96, prolog size 16, PerfScore 78.00, instruction count 18, allocated bytes for code 96 (MethodHash=b3f5f759) for method System.Diagnostics.Stopwatch:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -66,7 +56,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 70 (0x00046) Actual length = 140 (0x00008c)
+ Function Length : 48 (0x00030) Actual length = 96 (0x000060)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-44 (-29.33%) : 3888.dasm - System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)@@ -8,15 +8,15 @@
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
-; V00 this [V00,T03] ( 3, 3 ) byref -> a0 this single-def
-; V01 arg1 [V01,T00] ( 5, 5 ) long -> a1 single-def
-; V02 arg2 [V02,T04] ( 3, 3 ) byref -> a2 single-def
-; V03 loc0 [V03,T06] ( 4, 3 ) long -> a4
-; V04 loc1 [V04,T02] ( 8, 5.50) long -> a0
+; V00 this [V00,T02] ( 3, 3 ) byref -> a0 this single-def
+; V01 arg1 [V01,T00] ( 4, 4 ) long -> a1 single-def
+; V02 arg2 [V02,T03] ( 3, 3 ) byref -> a2 single-def
+; V03 loc0 [V03,T06] ( 4, 3 ) long -> a5
+; V04 loc1 [V04,T01] ( 8, 5.50) long -> a0
;# V05 OutArgs [V05 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;* V06 tmp1 [V06 ] ( 0, 0 ) struct (16) zero-ref "dup spill" <System.ValueTuple`2[long,long]>
;* V07 tmp2 [V07 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op "NewObj constructor temp" <System.TimeOnly>
-; V08 tmp3 [V08,T05] ( 5, 4 ) long -> a3 "Inline stloc first use temp"
+; V08 tmp3 [V08,T04] ( 5, 4 ) long -> a4 "Inline stloc first use temp"
;* V09 tmp4 [V09 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[long,long]>
;* V10 tmp5 [V10 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V11 tmp6 [V11 ] ( 0, 0 ) long -> zero-ref "field V06.Item1 (fldOffset=0x0)" P-INDEP
@@ -24,7 +24,7 @@
;* V13 tmp8 [V13 ] ( 0, 0 ) long -> zero-ref "field V07._ticks (fldOffset=0x0)" P-INDEP
;* V14 tmp9 [V14 ] ( 0, 0 ) long -> zero-ref "field V09.Item1 (fldOffset=0x0)" P-INDEP
; V15 tmp10 [V15,T07] ( 2, 2 ) long -> a1 "field V09.Item2 (fldOffset=0x8)" P-INDEP
-; V16 rat0 [V16,T01] ( 3, 6 ) long -> a3 "ReplaceWithLclVar is creating a new local variable"
+; V16 cse0 [V16,T05] ( 5, 4 ) long -> a3 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -37,55 +37,41 @@ G_M56108_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=16 bbWeight=1 PerfScore 9.00
G_M56108_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=1400 {a0 a2}, byref, isz
; byrRegs +[a0 a2]
- auipc a3, 0xD1FFAB1E
- ld a3, 0xD1FFAB1E(a3)
- mulh a3, a3, a1
- add a3, a3, a1
- srli a4, a3, 63
- srai a3, a3, 39
- add a3, a3, a4
- lui a4, 0xD1FFAB1E
- addiw a4, a4, 0xD1FFAB1E
- slli a4, a4, 14
- mul a4, a3, a4
- sub a1, a1, a4
- mv a4, a3
+ lui a3, 0xD1FFAB1E
+ addiw a3, a3, 0xD1FFAB1E
+ slli a3, a3, 14
+ div a4, a1, a3
+ mul a5, a4, a3
+ sub a1, a1, a5
+ mv a5, a4
ld a0, 0xD1FFAB1E(a0)
; byrRegs -[a0]
add a0, a1, a0
blt a0, zero, G_M56108_IG04
- ;; size=58 bbWeight=1 PerfScore 18.00
+ ;; size=38 bbWeight=1 PerfScore 49.00
G_M56108_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref, isz
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 14
- blt a0, a1, G_M56108_IG05
- addi a4, a3, 0xD1FFAB1E
- lui a3, 0xD1FFAB1E
- addiw a3, a3, 0xD1FFAB1E
- slli a3, a3, 14
- add a0, a0, a3
+ blt a0, a3, G_M56108_IG05
+ addi a5, a4, 0xD1FFAB1E
+ lui a4, 0xD1FFAB1E
+ addiw a4, a4, 0xD1FFAB1E
+ slli a4, a4, 14
+ add a0, a0, a4
j G_M56108_IG05
- ;; size=38 bbWeight=0.50 PerfScore 6.00
+ ;; size=26 bbWeight=0.50 PerfScore 4.50
G_M56108_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref
- addi a4, a3, 0xD1FFAB1E
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 14
- add a0, a0, a1
- ;; size=18 bbWeight=0.50 PerfScore 2.00
+ addi a5, a4, 0xD1FFAB1E
+ add a0, a0, a3
+ ;; size=6 bbWeight=0.50 PerfScore 0.50
G_M56108_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref
- sw a4, 0xD1FFAB1E(a2)
+ sw a5, 0xD1FFAB1E(a2)
;; size=4 bbWeight=1 PerfScore 4.00
G_M56108_IG06: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-RWD00 dq A2E3FF1DE20581E3h
-
-; Total bytes of code 150, prolog size 16, PerfScore 46.50, instruction count 31, allocated bytes for code 150 (MethodHash=dbbf24d3) for method System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)
+; Total bytes of code 106, prolog size 16, PerfScore 74.50, instruction count 24, allocated bytes for code 106 (MethodHash=dbbf24d3) for method System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)
; ============================================================
Unwind Info:
@@ -96,7 +82,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 75 (0x0004b) Actual length = 150 (0x000096)
+ Function Length : 53 (0x00035) Actual length = 106 (0x00006a)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-40 (-25.64%) : 6924.dasm - System.Globalization.PersianCalendar:.cctor() (FullOpts)@@ -28,8 +28,8 @@
;* V17 tmp16 [V17 ] ( 0, 0 ) int -> zero-ref "dup spill"
;* V18 tmp17 [V18 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V19 tmp18 [V19 ] ( 0, 0 ) long -> zero-ref "field V00._dateData (fldOffset=0x0)" P-INDEP
-;* V20 tmp19 [V20,T00] ( 0, 0 ) long -> zero-ref "field V02._dateData (fldOffset=0x0)" P-INDEP
-;* V21 tmp20 [V21,T01] ( 0, 0 ) long -> zero-ref "field V03._dateData (fldOffset=0x0)" P-INDEP
+;* V20 tmp19 [V20,T03] ( 0, 0 ) long -> zero-ref "field V02._dateData (fldOffset=0x0)" P-INDEP
+;* V21 tmp20 [V21,T04] ( 0, 0 ) long -> zero-ref "field V03._dateData (fldOffset=0x0)" P-INDEP
;* V22 tmp21 [V22 ] ( 0, 0 ) long -> zero-ref "field V04._dateData (fldOffset=0x0)" P-INDEP
;* V23 tmp22 [V23 ] ( 0, 0 ) byref -> zero-ref single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V24 tmp23 [V24 ] ( 0, 0 ) int -> zero-ref "field V05._length (fldOffset=0x8)" P-INDEP
@@ -47,6 +47,11 @@
;* V36 tmp35 [V36 ] ( 0, 0 ) int -> zero-ref "field V15._length (fldOffset=0x8)" P-INDEP
;* V37 tmp36 [V37 ] ( 0, 0 ) byref -> zero-ref "field V16._reference (fldOffset=0x0)" P-INDEP
;* V38 tmp37 [V38 ] ( 0, 0 ) int -> zero-ref "field V16._length (fldOffset=0x8)" P-INDEP
+; V39 cse0 [V39,T00] ( 3, 3 ) long -> a0 "CSE #01: aggressive"
+;* V40 cse1 [V40,T02] ( 0, 0 ) byref -> zero-ref "CSE #02: aggressive"
+; V41 cse2 [V41,T01] ( 3, 3 ) long -> a0 "CSE #05: aggressive"
+;* V42 cse3 [V42,T05] ( 0, 0 ) long -> zero-ref "CSE #04: aggressive"
+;* V43 cse4 [V43,T06] ( 0, 0 ) int -> zero-ref "CSE #03: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -58,38 +63,28 @@ G_M6342_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M6342_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- addi a0, zero, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sw a0, 0xD1FFAB1E(t6)
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a0, 0xD1FFAB1E(t6)
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- sd a0, 0xD1FFAB1E(t6)
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- sd a0, 0xD1FFAB1E(t6)
- ;; size=124 bbWeight=1 PerfScore 42.50
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
+ addi a1, zero, 0xD1FFAB1E
+ sw a1, 0xD1FFAB1E(a0)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ sd a1, 0xD1FFAB1E(a0)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 13
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 2
+ auipc a1, 0xD1FFAB1E
+ ld a1, 0xD1FFAB1E(a1)
+ sd a1, 0xD1FFAB1E(a0)
+ auipc a1, 0xD1FFAB1E
+ ld a1, 0xD1FFAB1E(a1)
+ sd a1, 0xD1FFAB1E(a0)
+ ;; size=84 bbWeight=1 PerfScore 32.50
G_M6342_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
@@ -99,7 +94,7 @@ RWD00 dq 02B876DE15224000h
RWD08 dq 2BCA2875F4373FFFh
-; Total bytes of code 156, prolog size 16, PerfScore 59.00, instruction count 20, allocated bytes for code 156 (MethodHash=a9d8e739) for method System.Globalization.PersianCalendar:.cctor() (FullOpts)
+; Total bytes of code 116, prolog size 16, PerfScore 49.00, instruction count 18, allocated bytes for code 116 (MethodHash=a9d8e739) for method System.Globalization.PersianCalendar:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -110,7 +105,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 78 (0x0004e) Actual length = 156 (0x00009c)
+ Function Length : 58 (0x0003a) Actual length = 116 (0x000074)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+32 (+61.54%) : 18192.dasm - System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)@@ -25,16 +25,25 @@ G_M64730_IG01: ; bbWeight=0, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=24 bbWeight=0 PerfScore 0.00
G_M64730_IG02: ; bbWeight=0, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
ld a0, 0xD1FFAB1E(s1)
- auipc ra, 0xD1FFAB1E
- jalr ra // <unknown method>
+ lui ra, 0xD1FFAB1E
+ addiw ra, ra, 0xD1FFAB1E
+ slli ra, ra, 12
+ addi ra, ra, 0xD1FFAB1E
+ slli ra, ra, 3
+ jalr ra, 0xD1FFAB1E(ra) // <unknown method>
mv a0, zero
- auipc ra, 0xD1FFAB1E
- jalr ra // CORINFO_HELP_THROW
- ; gcrRegs -[s1]
+ ; gcrRegs +[a0]
+ lui ra, 0xD1FFAB1E
+ addiw ra, ra, 0xD1FFAB1E
+ slli ra, ra, 13
+ addi ra, ra, 0xD1FFAB1E
+ slli ra, ra, 2
+ jalr ra, 0xD1FFAB1E(ra) // CORINFO_HELP_THROW
+ ; gcrRegs -[s1-a0]
ebreak
- ;; size=28 bbWeight=0 PerfScore 0.00
+ ;; size=60 bbWeight=0 PerfScore 0.00
-; Total bytes of code 52, prolog size 20, PerfScore 0.00, instruction count 11, allocated bytes for code 52 (MethodHash=666c0325) for method System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)
+; Total bytes of code 84, prolog size 20, PerfScore 0.00, instruction count 13, allocated bytes for code 84 (MethodHash=666c0325) for method System.Runtime.CompilerServices.CastHelpers:ThrowInvalidCastException(System.Object,ptr) (FullOpts)
; ============================================================
Unwind Info:
@@ -45,7 +54,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
+ Function Length : 42 (0x0002a) Actual length = 84 (0x000054)
---- Epilog scopes ----
No epilogs
---- Unwind codes ----+16 (+44.44%) : 23080.dasm - System.Double:Pow(double,double):double (FullOpts)@@ -24,11 +24,15 @@ G_M57204_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
- auipc t2, 0xD1FFAB1E
- jr t2 // <unknown method>
- ;; size=20 bbWeight=1 PerfScore 8.50
+ lui t6, 0xD1FFAB1E
+ addiw t6, t6, 0xD1FFAB1E
+ slli t6, t6, 12
+ addi t6, t6, 0xD1FFAB1E
+ slli t6, t6, 3
+ jalr zero, 0xD1FFAB1E(t6) // <unknown method>
+ ;; size=36 bbWeight=1 PerfScore 12.50
-; Total bytes of code 36, prolog size 16, PerfScore 17.50, instruction count 8, allocated bytes for code 36 (MethodHash=6920208b) for method System.Double:Pow(double,double):double (FullOpts)
+; Total bytes of code 52, prolog size 16, PerfScore 21.50, instruction count 9, allocated bytes for code 52 (MethodHash=6920208b) for method System.Double:Pow(double,double):double (FullOpts)
; ============================================================
Unwind Info:
@@ -39,7 +43,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 18 (0x00012) Actual length = 36 (0x000024)
+ Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+16 (+44.44%) : 23020.dasm - System.Double:Log10(double):double (FullOpts)@@ -23,11 +23,15 @@ G_M51936_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
- auipc t2, 0xD1FFAB1E
- jr t2 // <unknown method>
- ;; size=20 bbWeight=1 PerfScore 8.50
+ lui t6, 0xD1FFAB1E
+ addiw t6, t6, 0xD1FFAB1E
+ slli t6, t6, 12
+ addi t6, t6, 0xD1FFAB1E
+ slli t6, t6, 3
+ jalr zero, 0xD1FFAB1E(t6) // <unknown method>
+ ;; size=36 bbWeight=1 PerfScore 12.50
-; Total bytes of code 36, prolog size 16, PerfScore 17.50, instruction count 8, allocated bytes for code 36 (MethodHash=5391351f) for method System.Double:Log10(double):double (FullOpts)
+; Total bytes of code 52, prolog size 16, PerfScore 21.50, instruction count 9, allocated bytes for code 52 (MethodHash=5391351f) for method System.Double:Log10(double):double (FullOpts)
; ============================================================
Unwind Info:
@@ -38,7 +42,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 18 (0x00012) Actual length = 36 (0x000024)
+ Function Length : 26 (0x0001a) Actual length = 52 (0x000034)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze outputDiffs running on this PR(hoist-loop)Diffs are based on 12,996 contexts (338 MinOpts, 12,658 FullOpts). MISSED contexts: base: 22 (0.17%), diff: 1 (0.01%) Overall (-18,586 bytes)
MinOpts (+0 bytes)
FullOpts (-18,586 bytes)
Example diffsSystem.Private.CoreLib.mch-44 (-31.43%) : 18642.dasm - System.Diagnostics.Stopwatch:.cctor() (FullOpts)@@ -8,6 +8,7 @@
; Final local variable assignments
;
;# V00 OutArgs [V00 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
+; V01 cse0 [V01,T00] ( 5, 5 ) long -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -21,32 +22,21 @@ G_M2214_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
G_M2214_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- sd a0, 0xD1FFAB1E(t6)
- addi a0, zero, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- sb a0, 0xD1FFAB1E(t6)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- ld a0, 0xD1FFAB1E(t6)
- fcvt.d.l ft4, a0
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ sd a1, 0xD1FFAB1E(a0)
+ addi a1, zero, 0xD1FFAB1E
+ sb a1, 0xD1FFAB1E(a0)
+ ld a1, 0xD1FFAB1E(a0)
+ fcvt.d.l ft4, a1
auipc t6, 0xD1FFAB1E
fld ft5, 0xD1FFAB1E(t6)
fdiv.d ft4, ft5, ft4
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 32
- srli t6, t6, 18
- fsd ft4, 0xD1FFAB1E(t6)
- ;; size=108 bbWeight=1 PerfScore 72.50
+ fsd ft4, 0xD1FFAB1E(a0)
+ ;; size=64 bbWeight=1 PerfScore 61.50
G_M2214_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
@@ -55,7 +45,7 @@ G_M2214_IG03: ; bbWeight=1, epilog, nogc, extend
RWD00 dq 416312D000000000h ; 10000000
-; Total bytes of code 140, prolog size 16, PerfScore 89.00, instruction count 21, allocated bytes for code 140 (MethodHash=b3f5f759) for method System.Diagnostics.Stopwatch:.cctor() (FullOpts)
+; Total bytes of code 96, prolog size 16, PerfScore 78.00, instruction count 18, allocated bytes for code 96 (MethodHash=b3f5f759) for method System.Diagnostics.Stopwatch:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -66,7 +56,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 70 (0x00046) Actual length = 140 (0x00008c)
+ Function Length : 48 (0x00030) Actual length = 96 (0x000060)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-44 (-29.33%) : 3888.dasm - System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)@@ -8,15 +8,15 @@
; 0 inlinees with PGO data; 3 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
-; V00 this [V00,T03] ( 3, 3 ) byref -> a0 this single-def
-; V01 arg1 [V01,T00] ( 5, 5 ) long -> a1 single-def
-; V02 arg2 [V02,T04] ( 3, 3 ) byref -> a2 single-def
-; V03 loc0 [V03,T06] ( 4, 3 ) long -> a4
-; V04 loc1 [V04,T02] ( 8, 5.50) long -> a0
+; V00 this [V00,T02] ( 3, 3 ) byref -> a0 this single-def
+; V01 arg1 [V01,T00] ( 4, 4 ) long -> a1 single-def
+; V02 arg2 [V02,T03] ( 3, 3 ) byref -> a2 single-def
+; V03 loc0 [V03,T06] ( 4, 3 ) long -> a5
+; V04 loc1 [V04,T01] ( 8, 5.50) long -> a0
;# V05 OutArgs [V05 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;* V06 tmp1 [V06 ] ( 0, 0 ) struct (16) zero-ref "dup spill" <System.ValueTuple`2[long,long]>
;* V07 tmp2 [V07 ] ( 0, 0 ) struct ( 8) zero-ref ld-addr-op "NewObj constructor temp" <System.TimeOnly>
-; V08 tmp3 [V08,T05] ( 5, 4 ) long -> a3 "Inline stloc first use temp"
+; V08 tmp3 [V08,T04] ( 5, 4 ) long -> a4 "Inline stloc first use temp"
;* V09 tmp4 [V09 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "NewObj constructor temp" <System.ValueTuple`2[long,long]>
;* V10 tmp5 [V10 ] ( 0, 0 ) long -> zero-ref "Inlining Arg"
;* V11 tmp6 [V11 ] ( 0, 0 ) long -> zero-ref "field V06.Item1 (fldOffset=0x0)" P-INDEP
@@ -24,7 +24,7 @@
;* V13 tmp8 [V13 ] ( 0, 0 ) long -> zero-ref "field V07._ticks (fldOffset=0x0)" P-INDEP
;* V14 tmp9 [V14 ] ( 0, 0 ) long -> zero-ref "field V09.Item1 (fldOffset=0x0)" P-INDEP
; V15 tmp10 [V15,T07] ( 2, 2 ) long -> a1 "field V09.Item2 (fldOffset=0x8)" P-INDEP
-; V16 rat0 [V16,T01] ( 3, 6 ) long -> a3 "ReplaceWithLclVar is creating a new local variable"
+; V16 cse0 [V16,T05] ( 5, 4 ) long -> a3 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -37,55 +37,41 @@ G_M56108_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
;; size=16 bbWeight=1 PerfScore 9.00
G_M56108_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=1400 {a0 a2}, byref, isz
; byrRegs +[a0 a2]
- auipc a3, 0xD1FFAB1E
- ld a3, 0xD1FFAB1E(a3)
- mulh a3, a3, a1
- add a3, a3, a1
- srli a4, a3, 63
- srai a3, a3, 39
- add a3, a3, a4
- lui a4, 0xD1FFAB1E
- addiw a4, a4, 0xD1FFAB1E
- slli a4, a4, 14
- mul a4, a3, a4
- sub a1, a1, a4
- mv a4, a3
+ lui a3, 0xD1FFAB1E
+ addiw a3, a3, 0xD1FFAB1E
+ slli a3, a3, 14
+ div a4, a1, a3
+ mul a5, a4, a3
+ sub a1, a1, a5
+ mv a5, a4
ld a0, 0xD1FFAB1E(a0)
; byrRegs -[a0]
add a0, a1, a0
blt a0, zero, G_M56108_IG04
- ;; size=58 bbWeight=1 PerfScore 18.00
+ ;; size=38 bbWeight=1 PerfScore 49.00
G_M56108_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref, isz
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 14
- blt a0, a1, G_M56108_IG05
- addi a4, a3, 0xD1FFAB1E
- lui a3, 0xD1FFAB1E
- addiw a3, a3, 0xD1FFAB1E
- slli a3, a3, 14
- add a0, a0, a3
+ blt a0, a3, G_M56108_IG05
+ addi a5, a4, 0xD1FFAB1E
+ lui a4, 0xD1FFAB1E
+ addiw a4, a4, 0xD1FFAB1E
+ slli a4, a4, 14
+ add a0, a0, a4
j G_M56108_IG05
- ;; size=38 bbWeight=0.50 PerfScore 6.00
+ ;; size=26 bbWeight=0.50 PerfScore 4.50
G_M56108_IG04: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref
- addi a4, a3, 0xD1FFAB1E
- lui a1, 0xD1FFAB1E
- addiw a1, a1, 0xD1FFAB1E
- slli a1, a1, 14
- add a0, a0, a1
- ;; size=18 bbWeight=0.50 PerfScore 2.00
+ addi a5, a4, 0xD1FFAB1E
+ add a0, a0, a3
+ ;; size=6 bbWeight=0.50 PerfScore 0.50
G_M56108_IG05: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=1000 {a2}, byref
- sw a4, 0xD1FFAB1E(a2)
+ sw a5, 0xD1FFAB1E(a2)
;; size=4 bbWeight=1 PerfScore 4.00
G_M56108_IG06: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
addi sp, sp, 16
ret ;; size=16 bbWeight=1 PerfScore 7.50
-RWD00 dq A2E3FF1DE20581E3h
-
-; Total bytes of code 150, prolog size 16, PerfScore 46.50, instruction count 31, allocated bytes for code 150 (MethodHash=dbbf24d3) for method System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)
+; Total bytes of code 106, prolog size 16, PerfScore 74.50, instruction count 24, allocated bytes for code 106 (MethodHash=dbbf24d3) for method System.TimeOnly:AddTicks(long,byref):System.TimeOnly:this (FullOpts)
; ============================================================
Unwind Info:
@@ -96,7 +82,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 75 (0x0004b) Actual length = 150 (0x000096)
+ Function Length : 53 (0x00035) Actual length = 106 (0x00006a)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)-40 (-25.64%) : 6924.dasm - System.Globalization.PersianCalendar:.cctor() (FullOpts)@@ -28,8 +28,8 @@
;* V17 tmp16 [V17 ] ( 0, 0 ) int -> zero-ref "dup spill"
;* V18 tmp17 [V18 ] ( 0, 0 ) int -> zero-ref "Inline stloc first use temp"
;* V19 tmp18 [V19 ] ( 0, 0 ) long -> zero-ref "field V00._dateData (fldOffset=0x0)" P-INDEP
-;* V20 tmp19 [V20,T00] ( 0, 0 ) long -> zero-ref "field V02._dateData (fldOffset=0x0)" P-INDEP
-;* V21 tmp20 [V21,T01] ( 0, 0 ) long -> zero-ref "field V03._dateData (fldOffset=0x0)" P-INDEP
+;* V20 tmp19 [V20,T03] ( 0, 0 ) long -> zero-ref "field V02._dateData (fldOffset=0x0)" P-INDEP
+;* V21 tmp20 [V21,T04] ( 0, 0 ) long -> zero-ref "field V03._dateData (fldOffset=0x0)" P-INDEP
;* V22 tmp21 [V22 ] ( 0, 0 ) long -> zero-ref "field V04._dateData (fldOffset=0x0)" P-INDEP
;* V23 tmp22 [V23 ] ( 0, 0 ) byref -> zero-ref single-def "field V05._reference (fldOffset=0x0)" P-INDEP
;* V24 tmp23 [V24 ] ( 0, 0 ) int -> zero-ref "field V05._length (fldOffset=0x8)" P-INDEP
@@ -47,6 +47,11 @@
;* V36 tmp35 [V36 ] ( 0, 0 ) int -> zero-ref "field V15._length (fldOffset=0x8)" P-INDEP
;* V37 tmp36 [V37 ] ( 0, 0 ) byref -> zero-ref "field V16._reference (fldOffset=0x0)" P-INDEP
;* V38 tmp37 [V38 ] ( 0, 0 ) int -> zero-ref "field V16._length (fldOffset=0x8)" P-INDEP
+; V39 cse0 [V39,T00] ( 3, 3 ) long -> a0 "CSE #01: aggressive"
+;* V40 cse1 [V40,T02] ( 0, 0 ) byref -> zero-ref "CSE #02: aggressive"
+; V41 cse2 [V41,T01] ( 3, 3 ) long -> a0 "CSE #05: aggressive"
+;* V42 cse3 [V42,T05] ( 0, 0 ) long -> zero-ref "CSE #04: aggressive"
+;* V43 cse4 [V43,T06] ( 0, 0 ) int -> zero-ref "CSE #03: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=16; lcl=0
@@ -58,38 +63,28 @@ G_M6342_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref,
mv fp, sp
;; size=16 bbWeight=1 PerfScore 9.00
G_M6342_IG02: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
- addi a0, zero, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sw a0, 0xD1FFAB1E(t6)
lui a0, 0xD1FFAB1E
addiw a0, a0, 0xD1FFAB1E
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 12
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 3
- sd a0, 0xD1FFAB1E(t6)
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- sd a0, 0xD1FFAB1E(t6)
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
- lui t6, 0xD1FFAB1E
- addiw t6, t6, 0xD1FFAB1E
- slli t6, t6, 13
- addi t6, t6, 0xD1FFAB1E
- slli t6, t6, 2
- sd a0, 0xD1FFAB1E(t6)
- ;; size=124 bbWeight=1 PerfScore 42.50
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
+ addi a1, zero, 0xD1FFAB1E
+ sw a1, 0xD1FFAB1E(a0)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ sd a1, 0xD1FFAB1E(a0)
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 13
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 2
+ auipc a1, 0xD1FFAB1E
+ ld a1, 0xD1FFAB1E(a1)
+ sd a1, 0xD1FFAB1E(a0)
+ auipc a1, 0xD1FFAB1E
+ ld a1, 0xD1FFAB1E(a1)
+ sd a1, 0xD1FFAB1E(a0)
+ ;; size=84 bbWeight=1 PerfScore 32.50
G_M6342_IG03: ; bbWeight=1, epilog, nogc, extend
ld ra, 8(sp)
ld fp, 0(sp)
@@ -99,7 +94,7 @@ RWD00 dq 02B876DE15224000h
RWD08 dq 2BCA2875F4373FFFh
-; Total bytes of code 156, prolog size 16, PerfScore 59.00, instruction count 20, allocated bytes for code 156 (MethodHash=a9d8e739) for method System.Globalization.PersianCalendar:.cctor() (FullOpts)
+; Total bytes of code 116, prolog size 16, PerfScore 49.00, instruction count 18, allocated bytes for code 116 (MethodHash=a9d8e739) for method System.Globalization.PersianCalendar:.cctor() (FullOpts)
; ============================================================
Unwind Info:
@@ -110,7 +105,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 78 (0x0004e) Actual length = 156 (0x00009c)
+ Function Length : 58 (0x0003a) Actual length = 116 (0x000074)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+30 (+13.16%) : 152.dasm - System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)@@ -38,6 +38,7 @@
;* V27 tmp26 [V27 ] ( 0, 0 ) ubyte -> zero-ref "V03.[013..014)"
;* V28 tmp27 [V28 ] ( 0, 0 ) ubyte -> zero-ref "V03.[014..015)"
;* V29 tmp28 [V29 ] ( 0, 0 ) ubyte -> zero-ref "V03.[015..016)"
+; V30 cse0 [V30,T03] ( 3, 3 ) int -> a1 "CSE #01: aggressive"
;
; Lcl frame size = 0
Frame info. #outsz=0; #framesz=32; lcl=0
@@ -94,8 +95,16 @@ G_M31483_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
; byrRegs -[t3]
addi a1, zero, 0xD1FFAB1E
sw a1, 0xD1FFAB1E(s1)
- auipc a1, 0xD1FFAB1E
- ld a1, 0xD1FFAB1E(a1)
+ lui a1, 0xD1FFAB1E
+ addiw a1, a1, 0xD1FFAB1E
+ addiw a2, a1, 0xD1FFAB1E
+ zext.h a1, a1
+ slli.uw a1, a1, 32
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ or a1, a0, a1
+ slli.uw a2, a2, 48
+ or a1, a1, a2
auipc a2, 0xD1FFAB1E
ld a2, 0xD1FFAB1E(a2)
mv a0, s1
@@ -112,7 +121,7 @@ G_M31483_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
addi t6, t6, 0xD1FFAB1E
slli t6, t6, 2
ld a5, 0xD1FFAB1E(t6)
- ;; size=176 bbWeight=1 PerfScore 54.50
+ ;; size=206 bbWeight=1 PerfScore 59.50
G_M31483_IG03: ; bbWeight=1, epilog, nogc, extend
ld s2, 24(sp)
ld s1, 16(sp)
@@ -121,11 +130,10 @@ G_M31483_IG03: ; bbWeight=1, epilog, nogc, extend
addi sp, sp, 32
jr a5 // <unknown method>
;; size=24 bbWeight=1 PerfScore 11.50
-RWD00 dq 5DB95CEF0866B2B8h
-RWD08 dq 444A81FD0F0C1226h
+RWD00 dq 444A81FD0F0C1226h
-; Total bytes of code 228, prolog size 28, PerfScore 83.50, instruction count 35, allocated bytes for code 228 (MethodHash=51938504) for method System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)
+; Total bytes of code 258, prolog size 28, PerfScore 88.50, instruction count 42, allocated bytes for code 258 (MethodHash=51938504) for method System.Buffers.ArrayPoolEventSource:.ctor():this (FullOpts)
; ============================================================
Unwind Info:
@@ -136,7 +144,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 114 (0x00072) Actual length = 228 (0x0000e4)
+ Function Length : 129 (0x00081) Actual length = 258 (0x000102)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+16 (+11.76%) : 3688.dasm - System.OrdinalComparer:GetHashCode():int:this (FullOpts)@@ -12,6 +12,7 @@
; V01 loc0 [V01,T01] ( 3, 2 ) int -> a0
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
;* V03 tmp1 [V03 ] ( 0, 0 ) long -> zero-ref "Inline stloc first use temp"
+; V04 cse0 [V04,T02] ( 2, 2 ) ref -> a0 "CSE #01: aggressive"
;
; Lcl frame size = 8
Frame info. #outsz=0; #framesz=32; lcl=8
@@ -26,9 +27,14 @@ G_M30928_IG01: ; bbWeight=1, gcVars=0000000000000000 {}, gcrefRegs=0000 {
; gcrRegs +[s1]
;; size=24 bbWeight=1 PerfScore 13.50
G_M30928_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref, isz
+ lui a0, 0xD1FFAB1E
+ addiw a0, a0, 0xD1FFAB1E
+ slli a0, a0, 12
+ addi a0, a0, 0xD1FFAB1E
+ slli a0, a0, 3
addi a1, zero, 0xD1FFAB1E
- auipc a0, 0xD1FFAB1E
- ld a0, 0xD1FFAB1E(a0)
+ addi a0, a0, 0xD1FFAB1E
+ ; byrRegs +[a0]
lui a2, 0xD1FFAB1E
addiw a2, a2, 0xD1FFAB1E
lui a3, 0xD1FFAB1E
@@ -40,10 +46,11 @@ G_M30928_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
slli t6, t6, 2
ld a4, 0xD1FFAB1E(t6)
jalr a4 // <unknown method>
+ ; byrRegs -[a0]
lbu a1, 0xD1FFAB1E(s1)
sext.w a1, a1
beqz a1, G_M30928_IG05
- ;; size=68 bbWeight=1 PerfScore 23.50
+ ;; size=84 bbWeight=1 PerfScore 27.00
G_M30928_IG03: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref
; gcrRegs -[s1]
not a0, a0
@@ -60,10 +67,8 @@ G_M30928_IG05: ; bbWeight=0.50, gcrefRegs=0000 {}, byrefRegs=0000 {}, byr
ld fp, 8(sp)
addi sp, sp, 32
ret ;; size=20 bbWeight=0.50 PerfScore 4.75
-RWD00 dq 00002ACB341F29D4h
-
-; Total bytes of code 136, prolog size 20, PerfScore 46.75, instruction count 27, allocated bytes for code 136 (MethodHash=f0e4872f) for method System.OrdinalComparer:GetHashCode():int:this (FullOpts)
+; Total bytes of code 152, prolog size 20, PerfScore 50.25, instruction count 28, allocated bytes for code 152 (MethodHash=f0e4872f) for method System.OrdinalComparer:GetHashCode():int:this (FullOpts)
; ============================================================
Unwind Info:
@@ -74,7 +79,7 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 68 (0x00044) Actual length = 136 (0x000088)
+ Function Length : 76 (0x0004c) Actual length = 152 (0x000098)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)+8 (+8.33%) : 1546.dasm - System.Random+CompatSeedImpl:NextInt64():long:this (FullOpts)@@ -10,20 +10,27 @@
; V00 this [V00,T01] ( 3, 10 ) ref -> s1 this class-hnd single-def <System.Random+CompatSeedImpl>
; V01 loc0 [V01,T00] ( 3, 17 ) long -> a0
;# V02 OutArgs [V02 ] ( 1, 1 ) struct ( 0) [sp+0x00] do-not-enreg[XS] addr-exposed "OutgoingArgSpace" <Empty>
+; V03 cse0 [V03,T02] ( 2, 9 ) long -> s2 hoist "CSE #01: aggressive"
;
-; Lcl frame size = 8
-Frame info. #outsz=0; #framesz=32; lcl=8
+; Lcl frame size = 0
+Frame info. #outsz=0; #framesz=32; lcl=0
G_M45547_IG01: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, nogc <-- Prolog IG
addi sp, sp, -32
- sd fp, 8(sp)
- sd ra, 16(sp)
- sd s1, 24(sp)
- addi fp, sp, 8
+ sd fp, 0(sp)
+ sd ra, 8(sp)
+ sd s1, 16(sp)
+ sd s2, 24(sp)
+ mv fp, sp
mv s1, a0
; gcrRegs +[s1]
- ;; size=24 bbWeight=1 PerfScore 13.50
-G_M45547_IG02: ; bbWeight=8, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref, isz
+ ;; size=28 bbWeight=1 PerfScore 17.50
+G_M45547_IG02: ; bbWeight=1, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref
+ addiw s2, zero, 0xD1FFAB1E
+ slli s2, s2, 63
+ addi s2, s2, 0xD1FFAB1E
+ ;; size=12 bbWeight=1 PerfScore 3.00
+G_M45547_IG03: ; bbWeight=8, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byref, isz
mv a0, s1
; gcrRegs +[a0]
lui t6, 0xD1FFAB1E
@@ -35,20 +42,18 @@ G_M45547_IG02: ; bbWeight=8, gcrefRegs=0200 {s1}, byrefRegs=0000 {}, byre
jalr a1 // System.Random+CompatSeedImpl:NextUInt64():ulong:this
; gcrRegs -[a0]
srli a0, a0, 1
- addiw a1, zero, 0xD1FFAB1E
- slli a1, a1, 63
- addi a1, a1, 0xD1FFAB1E
- beq a0, a1, G_M45547_IG02
- ;; size=52 bbWeight=8 PerfScore 140.00
-G_M45547_IG03: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
+ beq a0, s2, G_M45547_IG03
+ ;; size=40 bbWeight=8 PerfScore 116.00
+G_M45547_IG04: ; bbWeight=1, gcrefRegs=0000 {}, byrefRegs=0000 {}, byref, epilog, nogc
; gcrRegs -[s1]
- ld s1, 24(sp)
- ld ra, 16(sp)
- ld fp, 8(sp)
+ ld s2, 24(sp)
+ ld s1, 16(sp)
+ ld ra, 8(sp)
+ ld fp, 0(sp)
addi sp, sp, 32
- ret ;; size=20 bbWeight=1 PerfScore 9.50
+ ret ;; size=24 bbWeight=1 PerfScore 11.50
-; Total bytes of code 96, prolog size 20, PerfScore 163.00, instruction count 18, allocated bytes for code 96 (MethodHash=f2254e14) for method System.Random+CompatSeedImpl:NextInt64():long:this (FullOpts)
+; Total bytes of code 104, prolog size 24, PerfScore 148.00, instruction count 20, allocated bytes for code 104 (MethodHash=f2254e14) for method System.Random+CompatSeedImpl:NextInt64():long:this (FullOpts)
; ============================================================
Unwind Info:
@@ -59,19 +64,19 @@ Unwind Info:
E bit : 0
X bit : 0
Vers : 0
- Function Length : 48 (0x00030) Actual length = 96 (0x000060)
+ Function Length : 52 (0x00034) Actual length = 104 (0x000068)
---- Epilog scopes ----
---- Scope 0
Epilog Start Offset : 3523193630 (0xd1ffab1e) Actual offset = 3523193630 (0xd1ffab1e) Offset from main function begin = 3523193630 (0xd1ffab1e)
- Epilog Start Index : 3 (0x03)
+ Epilog Start Index : 1 (0x01)
---- Unwind codes ----
- E2 00 01 add_fp 1 (0x01); addi fp, sp, #8
- ---- Epilog start at index 3 ----
- D0 08 03 save_reg X#8 Z#3 (0x03); sd s1, sp, 24
- D0 00 02 save_reg X#0 Z#2 (0x02); sd ra, sp, 16
- D0 07 01 save_reg X#7 Z#1 (0x01); sd fp, sp, 8
+ E1 set_fp; move fp, sp
+ ---- Epilog start at index 1 ----
+ D0 11 03 save_reg X#17 Z#3 (0x03); sd s2, sp, 24
+ D0 08 02 save_reg X#8 Z#2 (0x02); sd s1, sp, 16
+ D0 00 01 save_reg X#0 Z#1 (0x01); sd ra, sp, 8
+ D0 07 00 save_reg X#7 Z#0 (0x00); sd fp, sp, 0
02 alloc_s #2 (0x02); addi sp, sp, -32 (0x020)
E4 end
E4 end
- E4 end
DetailsSize improvements/regressions per collection
PerfScore improvements/regressions per collection
Context information
jit-analyze output |
The reason may be that when generating asm diffs, nearly all pointers end up out of range because we're in PMI replay. I remember having to force |
Constant CSE is not enabled by default in RISCV64.
Enabling the constant CSE hoists the instructions used to load large, but repeatedly used constants so they can be shared.
Since generating a large constant requires at least 3 instructions in RISCV64, enabling constant CSE suppresses the emission of several redundant instruction sequences which would otherwise generate the same large constant.
Part of #84834, cc @dotnet/samsung
@SkyShield @credo-quia-absurdum