forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MihaZupan] JIT: Optimize const ShiftRightLogical for byte values on XArch #88
Comments
Top method improvements-632 (-88.889% of base) - System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] ; Assembly listing for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
-; rbp based frame
-; fully interruptible
+; rsp based frame
+; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 48 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T17] ( 5, 5 ) byref -> rdi single-def
+; V00 RetBuf [V00,T00] ( 5, 5 ) byref -> rdi single-def
;* V01 arg0 [V01 ] ( 0, 0 ) struct (64) zero-ref single-def
-; V02 arg1 [V02,T18] ( 3, 3 ) int -> rsi single-def
+; V02 arg1 [V02,T02] ( 3, 3 ) int -> rsi single-def
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V04 tmp1 [V04,T33] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
-; V05 tmp2 [V05,T34] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
-; V06 tmp3 [V06,T27] ( 3, 6 ) simd32 -> [rbp-30H] do-not-enreg[SF] "Inlining Arg"
-; V07 tmp4 [V07,T35] ( 2, 4 ) simd16 -> mm0 "impAppendStmt"
-;* V08 tmp5 [V08 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V09 tmp6 [V09,T28] ( 3, 6 ) simd16 -> [rbp-40H] do-not-enreg[SF] "Inlining Arg"
-;* V10 tmp7 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V11 tmp8 [V11 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V12 tmp9 [V12 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V13 tmp10 [V13 ] ( 2, 5 ) struct ( 8) [rbp-48H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V14 tmp11 [V14,T01] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V15 tmp12 [V15 ] ( 2, 10 ) struct ( 8) [rbp-50H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V16 tmp13 [V16 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V17 tmp14 [V17 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V18 tmp15 [V18 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V19 tmp16 [V19,T09] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V20 tmp17 [V20 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V21 tmp18 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V22 tmp19 [V22 ] ( 2, 5 ) struct ( 8) [rbp-58H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V23 tmp20 [V23,T02] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V24 tmp21 [V24 ] ( 2, 10 ) struct ( 8) [rbp-60H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V25 tmp22 [V25 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V26 tmp23 [V26 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V27 tmp24 [V27 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V28 tmp25 [V28,T10] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V29 tmp26 [V29 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V30 tmp27 [V30,T37] ( 3, 3 ) simd16 -> [rbp-70H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V31 tmp28 [V31,T29] ( 3, 6 ) simd16 -> [rbp-80H] do-not-enreg[SF] "Inlining Arg"
-;* V32 tmp29 [V32 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V33 tmp30 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V34 tmp31 [V34 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V35 tmp32 [V35 ] ( 2, 5 ) struct ( 8) [rbp-88H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V36 tmp33 [V36,T03] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V37 tmp34 [V37 ] ( 2, 10 ) struct ( 8) [rbp-90H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V38 tmp35 [V38 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V39 tmp36 [V39 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V40 tmp37 [V40 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V41 tmp38 [V41,T11] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V42 tmp39 [V42 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V43 tmp40 [V43 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V44 tmp41 [V44 ] ( 2, 5 ) struct ( 8) [rbp-98H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V45 tmp42 [V45,T04] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V46 tmp43 [V46 ] ( 2, 10 ) struct ( 8) [rbp-A0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V47 tmp44 [V47 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V48 tmp45 [V48 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V49 tmp46 [V49 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V50 tmp47 [V50,T12] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V51 tmp48 [V51 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V52 tmp49 [V52,T38] ( 3, 3 ) simd16 -> [rbp-B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V53 tmp50 [V53 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V54 tmp51 [V54 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V55 tmp52 [V55,T30] ( 3, 6 ) simd32 -> [rbp-D0H] do-not-enreg[SF] "Inlining Arg"
-; V56 tmp53 [V56,T36] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
-;* V57 tmp54 [V57 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V58 tmp55 [V58,T31] ( 3, 6 ) simd16 -> [rbp-E0H] do-not-enreg[SF] "Inlining Arg"
-;* V59 tmp56 [V59 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V60 tmp57 [V60 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V61 tmp58 [V61 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V62 tmp59 [V62 ] ( 2, 5 ) struct ( 8) [rbp-E8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V63 tmp60 [V63,T05] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V64 tmp61 [V64 ] ( 2, 10 ) struct ( 8) [rbp-F0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V65 tmp62 [V65 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V66 tmp63 [V66 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V67 tmp64 [V67 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V68 tmp65 [V68,T13] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V69 tmp66 [V69 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V70 tmp67 [V70 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V71 tmp68 [V71 ] ( 2, 5 ) struct ( 8) [rbp-F8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V72 tmp69 [V72,T06] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V73 tmp70 [V73 ] ( 2, 10 ) struct ( 8) [rbp-100H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V74 tmp71 [V74 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V75 tmp72 [V75 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V76 tmp73 [V76 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V77 tmp74 [V77,T14] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V78 tmp75 [V78 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V79 tmp76 [V79,T39] ( 3, 3 ) simd16 -> [rbp-110H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V80 tmp77 [V80,T32] ( 3, 6 ) simd16 -> [rbp-120H] do-not-enreg[SF] "Inlining Arg"
-;* V81 tmp78 [V81 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V82 tmp79 [V82 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V83 tmp80 [V83 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V84 tmp81 [V84 ] ( 2, 5 ) struct ( 8) [rbp-128H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V85 tmp82 [V85,T07] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V86 tmp83 [V86 ] ( 2, 10 ) struct ( 8) [rbp-130H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V87 tmp84 [V87 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V88 tmp85 [V88 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V89 tmp86 [V89 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V90 tmp87 [V90,T15] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V91 tmp88 [V91 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V92 tmp89 [V92 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V93 tmp90 [V93 ] ( 2, 5 ) struct ( 8) [rbp-138H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V94 tmp91 [V94,T08] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V95 tmp92 [V95 ] ( 2, 10 ) struct ( 8) [rbp-140H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V96 tmp93 [V96 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V97 tmp94 [V97 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V98 tmp95 [V98 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V99 tmp96 [V99,T16] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V100 tmp97 [V100 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V101 tmp98 [V101,T40] ( 3, 3 ) simd16 -> [rbp-150H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V102 tmp99 [V102 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V103 tmp100 [V103 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-;* V104 tmp101 [V104 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp"
-; V105 tmp102 [V105,T43] ( 1, 1 ) simd32 -> [rbp+10H] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-; V106 tmp103 [V106,T44] ( 1, 1 ) simd32 -> [rbp+30H] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-; V107 tmp104 [V107,T19] ( 2, 2 ) long -> rax "field V10._00 (fldOffset=0x0)" P-INDEP
-; V108 tmp105 [V108,T20] ( 2, 2 ) long -> rcx "field V11._00 (fldOffset=0x0)" P-INDEP
-;* V109 tmp106 [V109 ] ( 0, 0 ) long -> zero-ref "field V12._00 (fldOffset=0x0)" P-INDEP
-; V110 tmp107 [V110 ] ( 2, 5 ) long -> [rbp-48H] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-; V111 tmp108 [V111 ] ( 2, 9 ) long -> [rbp-50H] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;* V112 tmp109 [V112 ] ( 0, 0 ) long -> zero-ref "field V21._00 (fldOffset=0x0)" P-INDEP
-; V113 tmp110 [V113 ] ( 2, 5 ) long -> [rbp-58H] do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-; V114 tmp111 [V114 ] ( 2, 9 ) long -> [rbp-60H] do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-; V115 tmp112 [V115,T21] ( 2, 2 ) long -> rax "field V32._00 (fldOffset=0x0)" P-INDEP
-; V116 tmp113 [V116,T22] ( 2, 2 ) long -> rcx "field V33._00 (fldOffset=0x0)" P-INDEP
-;* V117 tmp114 [V117 ] ( 0, 0 ) long -> zero-ref "field V34._00 (fldOffset=0x0)" P-INDEP
-; V118 tmp115 [V118 ] ( 2, 5 ) long -> [rbp-88H] do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-; V119 tmp116 [V119 ] ( 2, 9 ) long -> [rbp-90H] do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;* V120 tmp117 [V120 ] ( 0, 0 ) long -> zero-ref "field V43._00 (fldOffset=0x0)" P-INDEP
-; V121 tmp118 [V121 ] ( 2, 5 ) long -> [rbp-98H] do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-; V122 tmp119 [V122 ] ( 2, 9 ) long -> [rbp-A0H] do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-; V123 tmp120 [V123,T23] ( 2, 2 ) long -> rax "field V59._00 (fldOffset=0x0)" P-INDEP
-; V124 tmp121 [V124,T24] ( 2, 2 ) long -> rcx "field V60._00 (fldOffset=0x0)" P-INDEP
-;* V125 tmp122 [V125 ] ( 0, 0 ) long -> zero-ref "field V61._00 (fldOffset=0x0)" P-INDEP
-; V126 tmp123 [V126 ] ( 2, 5 ) long -> [rbp-E8H] do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
-; V127 tmp124 [V127 ] ( 2, 9 ) long -> [rbp-F0H] do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;* V128 tmp125 [V128 ] ( 0, 0 ) long -> zero-ref "field V70._00 (fldOffset=0x0)" P-INDEP
-; V129 tmp126 [V129 ] ( 2, 5 ) long -> [rbp-F8H] do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-; V130 tmp127 [V130 ] ( 2, 9 ) long -> [rbp-100H] do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-; V131 tmp128 [V131,T25] ( 2, 2 ) long -> rax "field V81._00 (fldOffset=0x0)" P-INDEP
-; V132 tmp129 [V132,T26] ( 2, 2 ) long -> rcx "field V82._00 (fldOffset=0x0)" P-INDEP
-;* V133 tmp130 [V133 ] ( 0, 0 ) long -> zero-ref "field V83._00 (fldOffset=0x0)" P-INDEP
-; V134 tmp131 [V134 ] ( 2, 5 ) long -> [rbp-128H] do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-; V135 tmp132 [V135 ] ( 2, 9 ) long -> [rbp-130H] do-not-enreg[X] addr-exposed "field V86._00 (fldOffset=0x0)" P-DEP
-;* V136 tmp133 [V136 ] ( 0, 0 ) long -> zero-ref "field V92._00 (fldOffset=0x0)" P-INDEP
-; V137 tmp134 [V137 ] ( 2, 5 ) long -> [rbp-138H] do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-; V138 tmp135 [V138 ] ( 2, 9 ) long -> [rbp-140H] do-not-enreg[X] addr-exposed "field V95._00 (fldOffset=0x0)" P-DEP
-; V139 tmp136 [V139,T41] ( 2, 2 ) simd32 -> mm0 "field V104._lower (fldOffset=0x0)" P-INDEP
-; V140 tmp137 [V140,T42] ( 2, 2 ) simd32 -> mm1 "field V104._upper (fldOffset=0x20)" P-INDEP
-; V141 cse0 [V141,T00] ( 9, 33 ) int -> rsi "CSE - aggressive"
+; V04 tmp1 [V04,T01] ( 3, 6 ) int -> rax "fgMakeTemp is creating a new local variable"
+; V05 tmp2 [V05,T03] ( 2, 4 ) int -> rsi "fgMakeTemp is creating a new local variable"
+; V06 tmp3 [V06,T05] ( 2, 4 ) simd32 -> mm0 "Inlining Arg"
+; V07 tmp4 [V07,T06] ( 2, 4 ) simd32 -> mm1 "Inlining Arg"
+;* V08 tmp5 [V08 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+; V09 tmp6 [V09,T10] ( 1, 1 ) simd32 -> [rsp+08H] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+; V10 tmp7 [V10,T11] ( 1, 1 ) simd32 -> [rsp+28H] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+; V11 tmp8 [V11,T08] ( 2, 2 ) simd32 -> mm0 "field V08._lower (fldOffset=0x0)" P-INDEP
+; V12 tmp9 [V12,T09] ( 2, 2 ) simd32 -> mm1 "field V08._upper (fldOffset=0x20)" P-INDEP
+; V13 cse0 [V13,T07] ( 3, 3 ) simd32 -> mm1 "CSE - aggressive"
+; V14 cse1 [V14,T04] ( 3, 3 ) int -> rsi "CSE - aggressive"
;
-; Lcl frame size = 336
+; Lcl frame size = 0
G_M22707_IG01:
- push rbp
- sub rsp, 336
vzeroupper
- lea rbp, [rsp+150H]
- ;; size=19 bbWeight=1 PerfScore 2.75
+ ;; size=3 bbWeight=1 PerfScore 1.00
G_M22707_IG02:
- vmovups ymm0, ymmword ptr [rbp+10H]
- vmovups ymmword ptr [rbp-30H], ymm0
- vmovups xmm0, xmmword ptr [rbp-30H]
- vmovaps xmmword ptr [rbp-40H], xmm0
- mov rax, qword ptr [rbp-40H]
- mov qword ptr [rbp-50H], rax
- xor eax, eax
and esi, 7
- align [0 bytes for IG03]
- ;; size=33 bbWeight=1 PerfScore 11.50
-G_M22707_IG03:
- lea rcx, bword ptr [rbp-50H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-48H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG03
- ;; size=38 bbWeight=4 PerfScore 27.00
-G_M22707_IG04:
- mov rax, qword ptr [rbp-48H]
- mov rcx, qword ptr [rbp-38H]
- mov qword ptr [rbp-60H], rcx
- xor ecx, ecx
- align [0 bytes for IG05]
- ;; size=14 bbWeight=1 PerfScore 3.25
-G_M22707_IG05:
- lea rdx, bword ptr [rbp-60H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-58H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG05
- ;; size=39 bbWeight=4 PerfScore 27.00
-G_M22707_IG06:
- mov rcx, qword ptr [rbp-58H]
- mov qword ptr [rbp-70H], rax
- mov qword ptr [rbp-68H], rcx
- vmovaps xmm0, xmmword ptr [rbp-70H]
- vmovups xmm1, xmmword ptr [rbp-20H]
- vmovaps xmmword ptr [rbp-80H], xmm1
- mov rax, qword ptr [rbp-80H]
- mov qword ptr [rbp-90H], rax
- xor eax, eax
- align [0 bytes for IG07]
- ;; size=40 bbWeight=1 PerfScore 12.25
-G_M22707_IG07:
- lea rcx, bword ptr [rbp-90H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-88H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG07
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG08:
- mov rax, qword ptr [rbp-88H]
- mov rcx, qword ptr [rbp-78H]
- mov qword ptr [rbp-A0H], rcx
- xor ecx, ecx
- align [0 bytes for IG09]
- ;; size=20 bbWeight=1 PerfScore 3.25
-G_M22707_IG09:
- lea rdx, bword ptr [rbp-A0H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-98H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG09
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG10:
- mov rcx, qword ptr [rbp-98H]
- mov qword ptr [rbp-B0H], rax
- mov qword ptr [rbp-A8H], rcx
- vinserti128 ymm0, ymm0, xmmword ptr [rbp-B0H], 1
- vmovups ymm1, ymmword ptr [rbp+30H]
- vmovups ymmword ptr [rbp-D0H], ymm1
- vmovups xmm1, xmmword ptr [rbp-D0H]
- vmovaps xmmword ptr [rbp-E0H], xmm1
- mov rax, qword ptr [rbp-E0H]
- mov qword ptr [rbp-F0H], rax
- xor eax, eax
- align [0 bytes for IG11]
- ;; size=76 bbWeight=1 PerfScore 18.25
-G_M22707_IG11:
- lea rcx, bword ptr [rbp-F0H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-E8H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG11
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG12:
- mov rax, qword ptr [rbp-E8H]
- mov rcx, qword ptr [rbp-D8H]
- mov qword ptr [rbp-100H], rcx
- xor ecx, ecx
- align [0 bytes for IG13]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M22707_IG13:
- lea rdx, bword ptr [rbp-100H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-F8H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG13
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG14:
- mov rcx, qword ptr [rbp-F8H]
- mov qword ptr [rbp-110H], rax
- mov qword ptr [rbp-108H], rcx
- vmovaps xmm1, xmmword ptr [rbp-110H]
- vmovups xmm2, xmmword ptr [rbp-C0H]
- vmovaps xmmword ptr [rbp-120H], xmm2
- mov rax, qword ptr [rbp-120H]
- mov qword ptr [rbp-130H], rax
- xor eax, eax
- align [3 bytes for IG15]
- ;; size=64 bbWeight=1 PerfScore 12.50
-G_M22707_IG15:
- lea rcx, bword ptr [rbp-130H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-128H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG15
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG16:
- mov rax, qword ptr [rbp-128H]
- mov rcx, qword ptr [rbp-118H]
- mov qword ptr [rbp-140H], rcx
- xor ecx, ecx
- align [0 bytes for IG17]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M22707_IG17:
- lea rdx, bword ptr [rbp-140H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-138H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG17
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG18:
- mov rcx, qword ptr [rbp-138H]
- mov qword ptr [rbp-150H], rax
- mov qword ptr [rbp-148H], rcx
- vinserti128 ymm1, ymm1, xmmword ptr [rbp-150H], 1
+ mov eax, esi
+ vmovd xmm0, rax
+ vmovups ymm1, ymmword ptr [rsp+08H]
+ vpsrld ymm0, ymm1, ymm0
+ mov ecx, 255
+ shrx eax, ecx, eax
+ vmovd xmm1, eax
+ vpbroadcastb ymm1, ymm1
+ vpand ymm0, ymm0, ymm1
+ vmovd xmm2, rsi
+ vmovups ymm3, ymmword ptr [rsp+28H]
+ vpsrld ymm2, ymm3, ymm2
+ vpand ymm1, ymm2, ymm1
vmovups ymmword ptr [rdi], ymm0
vmovups ymmword ptr [rdi+20H], ymm1
mov rax, rdi
- ;; size=43 bbWeight=1 PerfScore 11.25
-G_M22707_IG19:
+ ;; size=72 bbWeight=1 PerfScore 28.17
+G_M22707_IG03:
vzeroupper
- add rsp, 336
- pop rbp
ret
- ;; size=12 bbWeight=1 PerfScore 2.75
+ ;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 711, prolog size 19, PerfScore 371.35, instruction count 164, allocated bytes for code 711 (MethodHash=eab6a74c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts)
+; Total bytes of code 79, prolog size 3, PerfScore 39.07, instruction count 20, allocated bytes for code 79 (MethodHash=eab6a74c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts) -628 (-88.826% of base) - System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] ; Assembly listing for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX - Unix
; FullOpts code
; optimized code
-; rbp based frame
-; fully interruptible
+; rsp based frame
+; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 48 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T17] ( 5, 5 ) byref -> rdi single-def
+; V00 RetBuf [V00,T00] ( 5, 5 ) byref -> rdi single-def
;* V01 arg0 [V01 ] ( 0, 0 ) struct (64) zero-ref single-def
-; V02 arg1 [V02,T18] ( 3, 3 ) int -> rsi single-def
+; V02 arg1 [V02,T02] ( 3, 3 ) int -> rsi single-def
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V04 tmp1 [V04,T33] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
-; V05 tmp2 [V05,T34] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
-; V06 tmp3 [V06,T27] ( 3, 6 ) simd32 -> [rbp-30H] do-not-enreg[SF] "Inlining Arg"
-; V07 tmp4 [V07,T35] ( 2, 4 ) simd16 -> mm0 "impAppendStmt"
-;* V08 tmp5 [V08 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V09 tmp6 [V09,T28] ( 3, 6 ) simd16 -> [rbp-40H] do-not-enreg[SF] "Inlining Arg"
-;* V10 tmp7 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V11 tmp8 [V11 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V12 tmp9 [V12 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V13 tmp10 [V13 ] ( 2, 5 ) struct ( 8) [rbp-48H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V14 tmp11 [V14,T01] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V15 tmp12 [V15 ] ( 2, 10 ) struct ( 8) [rbp-50H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V16 tmp13 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V17 tmp14 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V18 tmp15 [V18 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V19 tmp16 [V19,T09] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V20 tmp17 [V20 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V21 tmp18 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V22 tmp19 [V22 ] ( 2, 5 ) struct ( 8) [rbp-58H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V23 tmp20 [V23,T02] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V24 tmp21 [V24 ] ( 2, 10 ) struct ( 8) [rbp-60H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V25 tmp22 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V26 tmp23 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V27 tmp24 [V27 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V28 tmp25 [V28,T10] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V29 tmp26 [V29 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V30 tmp27 [V30,T37] ( 3, 3 ) simd16 -> [rbp-70H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V31 tmp28 [V31,T29] ( 3, 6 ) simd16 -> [rbp-80H] do-not-enreg[SF] "Inlining Arg"
-;* V32 tmp29 [V32 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V33 tmp30 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V34 tmp31 [V34 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V35 tmp32 [V35 ] ( 2, 5 ) struct ( 8) [rbp-88H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V36 tmp33 [V36,T03] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V37 tmp34 [V37 ] ( 2, 10 ) struct ( 8) [rbp-90H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V38 tmp35 [V38 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V39 tmp36 [V39 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V40 tmp37 [V40 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V41 tmp38 [V41,T11] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V42 tmp39 [V42 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V43 tmp40 [V43 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V44 tmp41 [V44 ] ( 2, 5 ) struct ( 8) [rbp-98H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V45 tmp42 [V45,T04] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V46 tmp43 [V46 ] ( 2, 10 ) struct ( 8) [rbp-A0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V47 tmp44 [V47 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V48 tmp45 [V48 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V49 tmp46 [V49 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V50 tmp47 [V50,T12] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V51 tmp48 [V51 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V52 tmp49 [V52,T38] ( 3, 3 ) simd16 -> [rbp-B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V53 tmp50 [V53 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V54 tmp51 [V54 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V55 tmp52 [V55,T30] ( 3, 6 ) simd32 -> [rbp-D0H] do-not-enreg[SF] "Inlining Arg"
-; V56 tmp53 [V56,T36] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
-;* V57 tmp54 [V57 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V58 tmp55 [V58,T31] ( 3, 6 ) simd16 -> [rbp-E0H] do-not-enreg[SF] "Inlining Arg"
-;* V59 tmp56 [V59 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V60 tmp57 [V60 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V61 tmp58 [V61 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V62 tmp59 [V62 ] ( 2, 5 ) struct ( 8) [rbp-E8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V63 tmp60 [V63,T05] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V64 tmp61 [V64 ] ( 2, 10 ) struct ( 8) [rbp-F0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V65 tmp62 [V65 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V66 tmp63 [V66 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V67 tmp64 [V67 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V68 tmp65 [V68,T13] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V69 tmp66 [V69 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V70 tmp67 [V70 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V71 tmp68 [V71 ] ( 2, 5 ) struct ( 8) [rbp-F8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V72 tmp69 [V72,T06] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V73 tmp70 [V73 ] ( 2, 10 ) struct ( 8) [rbp-100H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V74 tmp71 [V74 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V75 tmp72 [V75 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V76 tmp73 [V76 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V77 tmp74 [V77,T14] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V78 tmp75 [V78 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V79 tmp76 [V79,T39] ( 3, 3 ) simd16 -> [rbp-110H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V80 tmp77 [V80,T32] ( 3, 6 ) simd16 -> [rbp-120H] do-not-enreg[SF] "Inlining Arg"
-;* V81 tmp78 [V81 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V82 tmp79 [V82 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V83 tmp80 [V83 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V84 tmp81 [V84 ] ( 2, 5 ) struct ( 8) [rbp-128H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V85 tmp82 [V85,T07] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V86 tmp83 [V86 ] ( 2, 10 ) struct ( 8) [rbp-130H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V87 tmp84 [V87 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V88 tmp85 [V88 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V89 tmp86 [V89 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V90 tmp87 [V90,T15] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V91 tmp88 [V91 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V92 tmp89 [V92 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V93 tmp90 [V93 ] ( 2, 5 ) struct ( 8) [rbp-138H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V94 tmp91 [V94,T08] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V95 tmp92 [V95 ] ( 2, 10 ) struct ( 8) [rbp-140H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V96 tmp93 [V96 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V97 tmp94 [V97 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V98 tmp95 [V98 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V99 tmp96 [V99,T16] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V100 tmp97 [V100 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V101 tmp98 [V101,T40] ( 3, 3 ) simd16 -> [rbp-150H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V102 tmp99 [V102 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V103 tmp100 [V103 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-;* V104 tmp101 [V104 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp"
-; V105 tmp102 [V105,T43] ( 1, 1 ) simd32 -> [rbp+10H] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
-; V106 tmp103 [V106,T44] ( 1, 1 ) simd32 -> [rbp+30H] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
-; V107 tmp104 [V107,T19] ( 2, 2 ) long -> rax "field V10._00 (fldOffset=0x0)" P-INDEP
-; V108 tmp105 [V108,T20] ( 2, 2 ) long -> rcx "field V11._00 (fldOffset=0x0)" P-INDEP
-;* V109 tmp106 [V109 ] ( 0, 0 ) long -> zero-ref "field V12._00 (fldOffset=0x0)" P-INDEP
-; V110 tmp107 [V110 ] ( 2, 5 ) long -> [rbp-48H] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-; V111 tmp108 [V111 ] ( 2, 9 ) long -> [rbp-50H] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;* V112 tmp109 [V112 ] ( 0, 0 ) long -> zero-ref "field V21._00 (fldOffset=0x0)" P-INDEP
-; V113 tmp110 [V113 ] ( 2, 5 ) long -> [rbp-58H] do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-; V114 tmp111 [V114 ] ( 2, 9 ) long -> [rbp-60H] do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-; V115 tmp112 [V115,T21] ( 2, 2 ) long -> rax "field V32._00 (fldOffset=0x0)" P-INDEP
-; V116 tmp113 [V116,T22] ( 2, 2 ) long -> rcx "field V33._00 (fldOffset=0x0)" P-INDEP
-;* V117 tmp114 [V117 ] ( 0, 0 ) long -> zero-ref "field V34._00 (fldOffset=0x0)" P-INDEP
-; V118 tmp115 [V118 ] ( 2, 5 ) long -> [rbp-88H] do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-; V119 tmp116 [V119 ] ( 2, 9 ) long -> [rbp-90H] do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;* V120 tmp117 [V120 ] ( 0, 0 ) long -> zero-ref "field V43._00 (fldOffset=0x0)" P-INDEP
-; V121 tmp118 [V121 ] ( 2, 5 ) long -> [rbp-98H] do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-; V122 tmp119 [V122 ] ( 2, 9 ) long -> [rbp-A0H] do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-; V123 tmp120 [V123,T23] ( 2, 2 ) long -> rax "field V59._00 (fldOffset=0x0)" P-INDEP
-; V124 tmp121 [V124,T24] ( 2, 2 ) long -> rcx "field V60._00 (fldOffset=0x0)" P-INDEP
-;* V125 tmp122 [V125 ] ( 0, 0 ) long -> zero-ref "field V61._00 (fldOffset=0x0)" P-INDEP
-; V126 tmp123 [V126 ] ( 2, 5 ) long -> [rbp-E8H] do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
-; V127 tmp124 [V127 ] ( 2, 9 ) long -> [rbp-F0H] do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;* V128 tmp125 [V128 ] ( 0, 0 ) long -> zero-ref "field V70._00 (fldOffset=0x0)" P-INDEP
-; V129 tmp126 [V129 ] ( 2, 5 ) long -> [rbp-F8H] do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-; V130 tmp127 [V130 ] ( 2, 9 ) long -> [rbp-100H] do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-; V131 tmp128 [V131,T25] ( 2, 2 ) long -> rax "field V81._00 (fldOffset=0x0)" P-INDEP
-; V132 tmp129 [V132,T26] ( 2, 2 ) long -> rcx "field V82._00 (fldOffset=0x0)" P-INDEP
-;* V133 tmp130 [V133 ] ( 0, 0 ) long -> zero-ref "field V83._00 (fldOffset=0x0)" P-INDEP
-; V134 tmp131 [V134 ] ( 2, 5 ) long -> [rbp-128H] do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-; V135 tmp132 [V135 ] ( 2, 9 ) long -> [rbp-130H] do-not-enreg[X] addr-exposed "field V86._00 (fldOffset=0x0)" P-DEP
-;* V136 tmp133 [V136 ] ( 0, 0 ) long -> zero-ref "field V92._00 (fldOffset=0x0)" P-INDEP
-; V137 tmp134 [V137 ] ( 2, 5 ) long -> [rbp-138H] do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-; V138 tmp135 [V138 ] ( 2, 9 ) long -> [rbp-140H] do-not-enreg[X] addr-exposed "field V95._00 (fldOffset=0x0)" P-DEP
-; V139 tmp136 [V139,T41] ( 2, 2 ) simd32 -> mm0 "field V104._lower (fldOffset=0x0)" P-INDEP
-; V140 tmp137 [V140,T42] ( 2, 2 ) simd32 -> mm1 "field V104._upper (fldOffset=0x20)" P-INDEP
-; V141 cse0 [V141,T00] ( 9, 33 ) int -> rsi "CSE - aggressive"
+; V04 tmp1 [V04,T01] ( 3, 6 ) int -> rax "fgMakeTemp is creating a new local variable"
+; V05 tmp2 [V05,T03] ( 2, 4 ) int -> rsi "fgMakeTemp is creating a new local variable"
+; V06 tmp3 [V06,T05] ( 2, 4 ) simd32 -> mm0 "Inlining Arg"
+; V07 tmp4 [V07,T06] ( 2, 4 ) simd32 -> mm1 "Inlining Arg"
+;* V08 tmp5 [V08 ] ( 0, 0 ) struct (64) zero-ref ld-addr-op "Inline ldloca(s) first use temp"
+; V09 tmp6 [V09,T10] ( 1, 1 ) simd32 -> [rsp+08H] single-def "field V01._lower (fldOffset=0x0)" P-INDEP
+; V10 tmp7 [V10,T11] ( 1, 1 ) simd32 -> [rsp+28H] single-def "field V01._upper (fldOffset=0x20)" P-INDEP
+; V11 tmp8 [V11,T08] ( 2, 2 ) simd32 -> mm0 "field V08._lower (fldOffset=0x0)" P-INDEP
+; V12 tmp9 [V12,T09] ( 2, 2 ) simd32 -> mm1 "field V08._upper (fldOffset=0x20)" P-INDEP
+; V13 cse0 [V13,T07] ( 3, 3 ) simd32 -> mm1 "CSE - aggressive"
+; V14 cse1 [V14,T04] ( 3, 3 ) int -> rsi "CSE - aggressive"
;
-; Lcl frame size = 336
+; Lcl frame size = 0
G_M33171_IG01:
- push rbp
- sub rsp, 336
vzeroupper
- lea rbp, [rsp+150H]
- ;; size=19 bbWeight=1 PerfScore 2.75
+ ;; size=3 bbWeight=1 PerfScore 1.00
G_M33171_IG02:
- vmovups ymm0, ymmword ptr [rbp+10H]
- vmovups ymmword ptr [rbp-30H], ymm0
- vmovups xmm0, xmmword ptr [rbp-30H]
- vmovaps xmmword ptr [rbp-40H], xmm0
- mov rax, qword ptr [rbp-40H]
- mov qword ptr [rbp-50H], rax
- xor eax, eax
and esi, 7
- align [0 bytes for IG03]
- ;; size=33 bbWeight=1 PerfScore 11.50
-G_M33171_IG03:
- lea rcx, bword ptr [rbp-50H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-48H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG03
- ;; size=37 bbWeight=4 PerfScore 27.00
-G_M33171_IG04:
- mov rax, qword ptr [rbp-48H]
- mov rcx, qword ptr [rbp-38H]
- mov qword ptr [rbp-60H], rcx
- xor ecx, ecx
- align [0 bytes for IG05]
- ;; size=14 bbWeight=1 PerfScore 3.25
-G_M33171_IG05:
- lea rdx, bword ptr [rbp-60H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-58H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG05
- ;; size=38 bbWeight=4 PerfScore 27.00
-G_M33171_IG06:
- mov rcx, qword ptr [rbp-58H]
- mov qword ptr [rbp-70H], rax
- mov qword ptr [rbp-68H], rcx
- vmovaps xmm0, xmmword ptr [rbp-70H]
- vmovups xmm1, xmmword ptr [rbp-20H]
- vmovaps xmmword ptr [rbp-80H], xmm1
- mov rax, qword ptr [rbp-80H]
- mov qword ptr [rbp-90H], rax
- xor eax, eax
- align [0 bytes for IG07]
- ;; size=40 bbWeight=1 PerfScore 12.25
-G_M33171_IG07:
- lea rcx, bword ptr [rbp-90H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-88H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG07
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG08:
- mov rax, qword ptr [rbp-88H]
- mov rcx, qword ptr [rbp-78H]
- mov qword ptr [rbp-A0H], rcx
- xor ecx, ecx
- align [0 bytes for IG09]
- ;; size=20 bbWeight=1 PerfScore 3.25
-G_M33171_IG09:
- lea rdx, bword ptr [rbp-A0H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-98H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG09
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG10:
- mov rcx, qword ptr [rbp-98H]
- mov qword ptr [rbp-B0H], rax
- mov qword ptr [rbp-A8H], rcx
- vinserti128 ymm0, ymm0, xmmword ptr [rbp-B0H], 1
- vmovups ymm1, ymmword ptr [rbp+30H]
- vmovups ymmword ptr [rbp-D0H], ymm1
- vmovups xmm1, xmmword ptr [rbp-D0H]
- vmovaps xmmword ptr [rbp-E0H], xmm1
- mov rax, qword ptr [rbp-E0H]
- mov qword ptr [rbp-F0H], rax
- xor eax, eax
- align [0 bytes for IG11]
- ;; size=76 bbWeight=1 PerfScore 18.25
-G_M33171_IG11:
- lea rcx, bword ptr [rbp-F0H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-E8H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG11
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG12:
- mov rax, qword ptr [rbp-E8H]
- mov rcx, qword ptr [rbp-D8H]
- mov qword ptr [rbp-100H], rcx
- xor ecx, ecx
- align [0 bytes for IG13]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M33171_IG13:
- lea rdx, bword ptr [rbp-100H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-F8H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG13
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG14:
- mov rcx, qword ptr [rbp-F8H]
- mov qword ptr [rbp-110H], rax
- mov qword ptr [rbp-108H], rcx
- vmovaps xmm1, xmmword ptr [rbp-110H]
- vmovups xmm2, xmmword ptr [rbp-C0H]
- vmovaps xmmword ptr [rbp-120H], xmm2
- mov rax, qword ptr [rbp-120H]
- mov qword ptr [rbp-130H], rax
- xor eax, eax
- align [0 bytes for IG15]
- ;; size=61 bbWeight=1 PerfScore 12.25
-G_M33171_IG15:
- lea rcx, bword ptr [rbp-130H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-128H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG15
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG16:
- mov rax, qword ptr [rbp-128H]
- mov rcx, qword ptr [rbp-118H]
- mov qword ptr [rbp-140H], rcx
- xor ecx, ecx
- align [7 bytes for IG17]
- ;; size=30 bbWeight=1 PerfScore 3.50
-G_M33171_IG17:
- lea rdx, bword ptr [rbp-140H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-138H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG17
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG18:
- mov rcx, qword ptr [rbp-138H]
- mov qword ptr [rbp-150H], rax
- mov qword ptr [rbp-148H], rcx
- vinserti128 ymm1, ymm1, xmmword ptr [rbp-150H], 1
+ mov eax, esi
+ vmovd xmm0, rax
+ vmovups ymm1, ymmword ptr [rsp+08H]
+ vpsrld ymm0, ymm1, ymm0
+ mov ecx, 255
+ shrx eax, ecx, eax
+ vmovd xmm1, eax
+ vpbroadcastb ymm1, ymm1
+ vpand ymm0, ymm0, ymm1
+ vmovd xmm2, rsi
+ vmovups ymm3, ymmword ptr [rsp+28H]
+ vpsrld ymm2, ymm3, ymm2
+ vpand ymm1, ymm2, ymm1
vmovups ymmword ptr [rdi], ymm0
vmovups ymmword ptr [rdi+20H], ymm1
mov rax, rdi
- ;; size=43 bbWeight=1 PerfScore 11.25
-G_M33171_IG19:
+ ;; size=72 bbWeight=1 PerfScore 28.17
+G_M33171_IG03:
vzeroupper
- add rsp, 336
- pop rbp
ret
- ;; size=12 bbWeight=1 PerfScore 2.75
+ ;; size=4 bbWeight=1 PerfScore 2.00
-; Total bytes of code 707, prolog size 19, PerfScore 370.95, instruction count 164, allocated bytes for code 707 (MethodHash=ecb57e6c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
+; Total bytes of code 79, prolog size 3, PerfScore 39.07, instruction count 20, allocated bytes for code 79 (MethodHash=ecb57e6c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts) Full list of diffs: https://gist.github.com/MihuBot/678ae8aad2b3f5827a506ad588309da0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Build completed in 50 minutes.
dotnet/runtime#86841
CoreLib diffs
Frameworks diffs
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: