forked from MihaZupan/runtime-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MihaZupan] JIT: Optimize const ShiftRightLogical for byte values on XArch #85
Comments
Top method regressions16 (1.56 % of base) - System.Buffers.ProbabilisticMap:IndexOfAnyVectorized(byref,byref,int,System.ReadOnlySpan`1[ushort]):int ; Assembly listing for method System.Buffers.ProbabilisticMap:IndexOfAnyVectorized(byref,byref,int,System.ReadOnlySpan`1[ushort]):int (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; fully interruptible
; No PGO data
; 0 inlinees with PGO data; 15 single block inlinees; 11 inlinees without PGO data
; Final local variable assignments
;
; V00 arg0 [V00,T10] ( 4, 4 ) byref -> rdi single-def
; V01 arg1 [V01,T09] ( 6, 5 ) byref -> r14 single-def
; V02 arg2 [V02,T11] ( 4, 4 ) int -> rdx single-def
;* V03 arg3 [V03 ] ( 0, 0 ) struct (16) zero-ref multireg-arg single-def
; V04 loc0 [V04,T14] ( 7, 4 ) byref -> r13 single-def
; V05 loc1 [V05,T00] ( 19, 76 ) byref -> r12
-; V06 loc2 [V06,T38] ( 5, 10 ) simd16 -> [rbp-40H] spill-single-def
-; V07 loc3 [V07,T39] ( 5, 10 ) simd16 -> [rbp-50H] spill-single-def
+; V06 loc2 [V06,T40] ( 5, 10 ) simd16 -> [rbp-40H] spill-single-def
+; V07 loc3 [V07,T41] ( 5, 10 ) simd16 -> [rbp-50H] spill-single-def
; V08 loc4 [V08,T12] ( 3, 5 ) byref -> [rbp-A0H] spill-single-def
-; V09 loc5 [V09,T40] ( 3, 8.50) simd32 -> [rbp-70H] spill-single-def
-; V10 loc6 [V10,T41] ( 3, 8.50) simd32 -> [rbp-90H] spill-single-def
+; V09 loc5 [V09,T42] ( 3, 8.50) simd32 -> [rbp-70H] spill-single-def
+; V10 loc6 [V10,T43] ( 3, 8.50) simd32 -> [rbp-90H] spill-single-def
; V11 loc7 [V11,T13] ( 3, 5 ) byref -> [rbp-A8H] spill-single-def
-; V12 loc8 [V12,T21] ( 4, 14 ) simd32 -> mm4
+; V12 loc8 [V12,T19] ( 4, 14 ) simd32 -> mm4
; V13 loc9 [V13,T01] ( 5, 66 ) int -> [rbp-94H]
; V14 loc10 [V14,T07] ( 3, 32.50) byref -> [rbp-B0H] spill-single-def
-; V15 loc11 [V15,T22] ( 4, 14 ) simd16 -> mm2
+; V15 loc11 [V15,T20] ( 4, 14 ) simd16 -> mm2
; V16 loc12 [V16,T02] ( 5, 66 ) int -> [rbp-98H]
; V17 loc13 [V17,T08] ( 3, 32.50) byref -> [rbp-B8H] spill-single-def
;# V18 OutArgs [V18 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V19 tmp1 [V19 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
;* V20 tmp2 [V20 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V21 tmp3 [V21 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
;* V22 tmp4 [V22 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V23 tmp5 [V23,T23] ( 3, 12 ) simd32 -> mm4 "Inline stloc first use temp"
-; V24 tmp6 [V24,T24] ( 3, 12 ) simd32 -> mm5 "Inline stloc first use temp"
-; V25 tmp7 [V25,T25] ( 3, 12 ) simd32 -> mm6 "Inline stloc first use temp"
-; V26 tmp8 [V26,T26] ( 3, 12 ) simd32 -> mm4 "Inline stloc first use temp"
-; V27 tmp9 [V27,T42] ( 2, 8 ) simd32 -> mm5 "Inline stloc first use temp"
-; V28 tmp10 [V28,T43] ( 2, 8 ) simd32 -> mm4 "Inline stloc first use temp"
+; V23 tmp5 [V23,T21] ( 3, 12 ) simd32 -> mm4 "Inline stloc first use temp"
+; V24 tmp6 [V24,T22] ( 3, 12 ) simd32 -> mm5 "Inline stloc first use temp"
+; V25 tmp7 [V25,T23] ( 3, 12 ) simd32 -> mm6 "Inline stloc first use temp"
+; V26 tmp8 [V26,T24] ( 3, 12 ) simd32 -> mm4 "Inline stloc first use temp"
+; V27 tmp9 [V27,T44] ( 2, 8 ) simd32 -> mm5 "Inline stloc first use temp"
+; V28 tmp10 [V28,T45] ( 2, 8 ) simd32 -> mm4 "Inline stloc first use temp"
;* V29 tmp11 [V29 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V30 tmp12 [V30,T44] ( 2, 8 ) simd32 -> mm5 "Inline stloc first use temp"
-; V31 tmp13 [V31,T17] ( 4, 16 ) simd32 -> mm6 "Inline stloc first use temp"
+; V30 tmp12 [V30,T46] ( 2, 8 ) simd32 -> mm5 "Inline stloc first use temp"
+; V31 tmp13 [V31,T15] ( 4, 16 ) simd32 -> mm6 "Inline stloc first use temp"
;* V32 tmp14 [V32 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V33 tmp15 [V33 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V34 tmp16 [V34 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V35 tmp17 [V35 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V36 tmp18 [V36 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V37 tmp19 [V37,T45] ( 2, 8 ) simd32 -> mm6 "Inline stloc first use temp"
-; V38 tmp20 [V38,T18] ( 4, 16 ) simd32 -> mm4 "Inline stloc first use temp"
+; V37 tmp19 [V37,T47] ( 2, 8 ) simd32 -> mm6 "Inline stloc first use temp"
+; V38 tmp20 [V38,T16] ( 4, 16 ) simd32 -> mm4 "Inline stloc first use temp"
;* V39 tmp21 [V39 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V40 tmp22 [V40 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V41 tmp23 [V41 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V42 tmp24 [V42 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V43 tmp25 [V43 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
; V44 tmp26 [V44,T03] ( 2, 64 ) ushort -> rsi "Inlining Arg"
;* V45 tmp27 [V45 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
-; V46 tmp28 [V46,T27] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp"
-; V47 tmp29 [V47,T28] ( 3, 12 ) simd16 -> mm3 "Inline stloc first use temp"
+; V46 tmp28 [V46,T25] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp"
+; V47 tmp29 [V47,T26] ( 3, 12 ) simd16 -> mm3 "Inline stloc first use temp"
;* V48 tmp30 [V48 ] ( 0, 0 ) simd16 -> zero-ref
-; V49 tmp31 [V49,T29] ( 3, 12 ) simd16 -> mm4 "Inline stloc first use temp"
+; V49 tmp31 [V49,T27] ( 3, 12 ) simd16 -> mm4 "Inline stloc first use temp"
;* V50 tmp32 [V50 ] ( 0, 0 ) simd16 -> zero-ref
-; V51 tmp33 [V51,T30] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp"
-; V52 tmp34 [V52,T46] ( 2, 8 ) simd16 -> mm3 "Inline stloc first use temp"
-; V53 tmp35 [V53,T47] ( 2, 8 ) simd16 -> mm2 "Inline stloc first use temp"
-;* V54 tmp36 [V54 ] ( 0, 0 ) simd16 -> zero-ref
-;* V55 tmp37 [V55 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-; V56 tmp38 [V56,T48] ( 2, 8 ) simd16 -> mm3 "Inline stloc first use temp"
-; V57 tmp39 [V57,T19] ( 4, 16 ) simd16 -> mm4 "Inline stloc first use temp"
+; V51 tmp33 [V51,T28] ( 3, 12 ) simd16 -> mm2 "Inline stloc first use temp"
+; V52 tmp34 [V52,T48] ( 2, 8 ) simd16 -> mm3 "Inline stloc first use temp"
+; V53 tmp35 [V53,T49] ( 2, 8 ) simd16 -> mm2 "Inline stloc first use temp"
+;* V54 tmp36 [V54 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+; V55 tmp37 [V55,T50] ( 2, 8 ) simd16 -> mm3 "Inline stloc first use temp"
+; V56 tmp38 [V56,T17] ( 4, 16 ) simd16 -> mm4 "Inline stloc first use temp"
+;* V57 tmp39 [V57 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V58 tmp40 [V58 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V59 tmp41 [V59 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V60 tmp42 [V60 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V61 tmp43 [V61 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V61 tmp43 [V61 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V62 tmp44 [V62 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V63 tmp45 [V63 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V64 tmp46 [V64 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V65 tmp47 [V65 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
-;* V66 tmp48 [V66 ] ( 0, 0 ) simd16 -> zero-ref
-;* V67 tmp49 [V67 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-; V68 tmp50 [V68,T49] ( 2, 8 ) simd16 -> mm4 "Inline stloc first use temp"
-; V69 tmp51 [V69,T20] ( 4, 16 ) simd16 -> mm2 "Inline stloc first use temp"
+;* V64 tmp46 [V64 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
+;* V65 tmp47 [V65 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+; V66 tmp48 [V66,T51] ( 2, 8 ) simd16 -> mm4 "Inline stloc first use temp"
+; V67 tmp49 [V67,T18] ( 4, 16 ) simd16 -> mm2 "Inline stloc first use temp"
+;* V68 tmp50 [V68 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V69 tmp51 [V69 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V70 tmp52 [V70 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V71 tmp53 [V71 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V72 tmp54 [V72 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V73 tmp55 [V73 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V72 tmp54 [V72 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
+;* V73 tmp55 [V73 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V74 tmp56 [V74 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V75 tmp57 [V75 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V76 tmp58 [V76 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V77 tmp59 [V77 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
+;* V75 tmp57 [V75 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
+;* V76 tmp58 [V76 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
+; V77 tmp59 [V77,T04] ( 2, 64 ) ushort -> rsi "Inlining Arg"
;* V78 tmp60 [V78 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
-; V79 tmp61 [V79,T04] ( 2, 64 ) ushort -> rsi "Inlining Arg"
-;* V80 tmp62 [V80 ] ( 0, 0 ) struct (16) zero-ref ld-addr-op "Inlining Arg"
-; V81 tmp63 [V81,T05] ( 3, 33 ) byref -> rbx single-def "field V03._reference (fldOffset=0x0)" P-INDEP
-; V82 tmp64 [V82,T06] ( 3, 33 ) int -> r15 single-def "field V03._length (fldOffset=0x8)" P-INDEP
-;* V83 tmp65 [V83 ] ( 0, 0 ) byref -> zero-ref "field V43._reference (fldOffset=0x0)" P-INDEP
-;* V84 tmp66 [V84 ] ( 0, 0 ) int -> zero-ref "field V43._length (fldOffset=0x8)" P-INDEP
-;* V85 tmp67 [V85 ] ( 0, 0 ) byref -> zero-ref "field V45._reference (fldOffset=0x0)" P-INDEP
-;* V86 tmp68 [V86 ] ( 0, 0 ) int -> zero-ref "field V45._length (fldOffset=0x8)" P-INDEP
+; V79 tmp61 [V79,T05] ( 3, 33 ) byref -> rbx single-def "field V03._reference (fldOffset=0x0)" P-INDEP
+; V80 tmp62 [V80,T06] ( 3, 33 ) int -> r15 single-def "field V03._length (fldOffset=0x8)" P-INDEP
+;* V81 tmp63 [V81 ] ( 0, 0 ) byref -> zero-ref "field V43._reference (fldOffset=0x0)" P-INDEP
+;* V82 tmp64 [V82 ] ( 0, 0 ) int -> zero-ref "field V43._length (fldOffset=0x8)" P-INDEP
+;* V83 tmp65 [V83 ] ( 0, 0 ) byref -> zero-ref "field V45._reference (fldOffset=0x0)" P-INDEP
+;* V84 tmp66 [V84 ] ( 0, 0 ) int -> zero-ref "field V45._length (fldOffset=0x8)" P-INDEP
+;* V85 tmp67 [V85 ] ( 0, 0 ) byref -> zero-ref "field V76._reference (fldOffset=0x0)" P-INDEP
+;* V86 tmp68 [V86 ] ( 0, 0 ) int -> zero-ref "field V76._length (fldOffset=0x8)" P-INDEP
;* V87 tmp69 [V87 ] ( 0, 0 ) byref -> zero-ref "field V78._reference (fldOffset=0x0)" P-INDEP
;* V88 tmp70 [V88 ] ( 0, 0 ) int -> zero-ref "field V78._length (fldOffset=0x8)" P-INDEP
-;* V89 tmp71 [V89 ] ( 0, 0 ) byref -> zero-ref "field V80._reference (fldOffset=0x0)" P-INDEP
-;* V90 tmp72 [V90 ] ( 0, 0 ) int -> zero-ref "field V80._length (fldOffset=0x8)" P-INDEP
-; V91 cse0 [V91,T15] ( 5, 20 ) simd32 -> mm7 "CSE - moderate"
-; V92 cse1 [V92,T16] ( 5, 20 ) simd16 -> mm5 "CSE - moderate"
-; V93 cse2 [V93,T31] ( 3, 12 ) simd32 -> mm6 "CSE - moderate"
-; V94 cse3 [V94,T32] ( 3, 12 ) simd32 -> mm8 "CSE - moderate"
-; V95 cse4 [V95,T33] ( 3, 12 ) simd32 -> mm10 "CSE - moderate"
-; V96 cse5 [V96,T34] ( 3, 12 ) simd16 -> mm4 "CSE - moderate"
-; V97 cse6 [V97,T35] ( 3, 12 ) simd16 -> mm6 "CSE - moderate"
-; V98 cse7 [V98,T36] ( 3, 12 ) simd16 -> mm7 "CSE - moderate"
-; V99 cse8 [V99,T37] ( 3, 12 ) simd16 -> mm9 "CSE - moderate"
+; V89 cse0 [V89,T29] ( 3, 12 ) simd32 -> mm6 "CSE - moderate"
+; V90 cse1 [V90,T30] ( 3, 12 ) simd32 -> mm7 "CSE - moderate"
+; V91 cse2 [V91,T31] ( 3, 12 ) simd32 -> mm8 "CSE - moderate"
+; V92 cse3 [V92,T32] ( 3, 12 ) simd32 -> mm9 "CSE - moderate"
+; V93 cse4 [V93,T33] ( 3, 12 ) simd32 -> mm11 "CSE - moderate"
+; V94 cse5 [V94,T34] ( 3, 12 ) simd16 -> mm4 "CSE - moderate"
+; V95 cse6 [V95,T35] ( 3, 12 ) simd16 -> mm5 "CSE - moderate"
+; V96 cse7 [V96,T36] ( 3, 12 ) simd16 -> mm6 "CSE - moderate"
+; V97 cse8 [V97,T37] ( 3, 12 ) simd16 -> mm7 "CSE - moderate"
+; V98 cse9 [V98,T38] ( 3, 12 ) simd16 -> mm8 "CSE - moderate"
+; V99 cse10 [V99,T39] ( 3, 12 ) simd16 -> mm10 "CSE - moderate"
;
; Lcl frame size = 152
G_M48875_IG01:
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 152
vzeroupper
lea rbp, [rsp+C0H]
mov r14, rsi
mov rbx, rcx
mov r15d, r8d
;; size=37 bbWeight=1 PerfScore 8.50
G_M48875_IG02:
movsxd rsi, edx
lea r13, bword ptr [r14+2*rsi]
mov r12, r14
vmovups xmm0, xmmword ptr [rdi]
vmovaps xmmword ptr [rbp-40H], xmm0
vmovups xmm1, xmmword ptr [rdi+10H]
vmovaps xmmword ptr [rbp-50H], xmm1
cmp edx, 32
jl G_M48875_IG14
;; size=38 bbWeight=1 PerfScore 12.25
G_M48875_IG03:
vmovaps ymm2, ymm0
vinserti128 ymm2, ymm2, xmm0, 1
vmovups ymmword ptr [rbp-70H], ymm2
vmovaps ymm3, ymm1
vinserti128 ymm3, ymm3, xmm1, 1
vmovups ymmword ptr [rbp-90H], ymm3
lea rax, bword ptr [r13-40H]
mov bword ptr [rbp-A8H], rax
;; size=44 bbWeight=0.50 PerfScore 4.00
G_M48875_IG04:
vmovups ymm4, ymmword ptr [r12]
vmovups ymm5, ymmword ptr [r12+20H]
vmovups ymm6, ymmword ptr [reloc @RWD00]
vpand ymm7, ymm4, ymm6
vpand ymm6, ymm5, ymm6
vpackuswb ymm6, ymm7, ymm6
vpsrlw ymm4, ymm4, 8
vpsrlw ymm5, ymm5, 8
vpackuswb ymm4, ymm4, ymm5
vpsrld ymm5, ymm6, 5
vmovups ymm7, ymmword ptr [reloc @RWD32]
vpand ymm5, ymm5, ymm7
vmovups ymm8, ymmword ptr [reloc @RWD64]
vpshufb ymm5, ymm8, ymm5
vmovups ymm8, ymmword ptr [reloc @RWD96]
vpand ymm6, ymm6, ymm8
- vpcmpub k1, ymm6, ymm7, 6
- vpmovm2b ymm9, k1
- vmovups ymm10, ymmword ptr [reloc @RWD128]
- vpsubb ymm11, ymm6, ymm10
- vpshufb ymm11, ymm3, ymm11
+ vmovups ymm9, ymmword ptr [reloc @RWD128]
+ vpcmpub k1, ymm6, ymm9, 6
+ vpmovm2b ymm10, k1
+ vmovups ymm11, ymmword ptr [reloc @RWD160]
+ vpsubb ymm12, ymm6, ymm11
+ vpshufb ymm12, ymm3, ymm12
vpshufb ymm6, ymm2, ymm6
- vpternlogd ymm9, ymm11, ymm6, -54
- vpand ymm5, ymm9, ymm5
+ vpternlogd ymm10, ymm12, ymm6, -54
+ vpand ymm5, ymm10, ymm5
vxorps ymm6, ymm6, ymm6
vpcmpeqb ymm5, ymm5, ymm6
vpcmpeqd ymm6, ymm6, ymm6
vpxor ymm5, ymm5, ymm6
vpsrld ymm6, ymm4, 5
vpand ymm6, ymm6, ymm7
- vmovups ymm9, ymmword ptr [reloc @RWD64]
- vpshufb ymm6, ymm9, ymm6
+ vmovups ymm7, ymmword ptr [reloc @RWD64]
+ vpshufb ymm6, ymm7, ymm6
vpand ymm4, ymm4, ymm8
- vpcmpub k1, ymm4, ymm7, 6
+ vpcmpub k1, ymm4, ymm9, 6
vpmovm2b ymm7, k1
- vpsubb ymm8, ymm4, ymm10
+ vpsubb ymm8, ymm4, ymm11
vpshufb ymm8, ymm3, ymm8
vpshufb ymm4, ymm2, ymm4
vpternlogd ymm7, ymm8, ymm4, -54
vpand ymm4, ymm7, ymm6
vxorps ymm6, ymm6, ymm6
vpcmpeqb ymm4, ymm4, ymm6
vpcmpeqd ymm6, ymm6, ymm6
vpxor ymm4, ymm4, ymm6
vpand ymm4, ymm5, ymm4
vptest ymm4, ymm4
je SHORT G_M48875_IG07
- ;; size=246 bbWeight=4 PerfScore 312.00
+ ;; size=254 bbWeight=4 PerfScore 328.00
G_M48875_IG05:
vpermq ymm4, ymm4, -40
vpmovmskb ecx, ymm4
;; size=10 bbWeight=2 PerfScore 10.00
G_M48875_IG06:
mov dword ptr [rbp-94H], ecx
xor esi, esi
tzcnt esi, ecx
lea r8, bword ptr [r12+2*rsi]
mov bword ptr [rbp-B0H], r8
movzx rsi, word ptr [r8]
movsx rsi, si
mov rdi, rbx
mov edx, r15d
mov r9, 0xD1FFAB1E ; code for System.SpanHelpers:NonPackedContainsValueType[short](byref,short,int):bool
call [r9]System.SpanHelpers:NonPackedContainsValueType[short](byref,short,int):bool
test eax, eax
jne SHORT G_M48875_IG11
blsr ecx, dword ptr [rbp-94H]
mov eax, ecx
test eax, eax
mov ecx, eax
jne SHORT G_M48875_IG06
;; size=71 bbWeight=16 PerfScore 252.00
G_M48875_IG07:
add r12, 64
mov rax, bword ptr [rbp-A8H]
cmp r12, rax
vmovups ymm2, ymmword ptr [rbp-70H]
vmovups ymm3, ymmword ptr [rbp-90H]
jbe G_M48875_IG04
;; size=33 bbWeight=4 PerfScore 42.00
G_M48875_IG08:
cmp r12, r13
je SHORT G_M48875_IG09
mov rsi, r13
sub rsi, r12
cmp rsi, 32
jle SHORT G_M48875_IG13
mov rax, bword ptr [rbp-A8H]
mov r12, rax
jmp G_M48875_IG04
;; size=32 bbWeight=0.50 PerfScore 3.12
G_M48875_IG09:
mov eax, -1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M48875_IG10:
vzeroupper
add rsp, 152
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=21 bbWeight=0.50 PerfScore 2.62
G_M48875_IG11:
mov r13, bword ptr [rbp-B0H]
sub r13, r14
mov rax, r13
shr rax, 1
;; size=16 bbWeight=0.50 PerfScore 1.00
G_M48875_IG12:
vzeroupper
add rsp, 152
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=21 bbWeight=0.50 PerfScore 2.62
G_M48875_IG13:
lea r12, bword ptr [r13-20H]
vmovaps xmm0, xmmword ptr [rbp-40H]
vmovaps xmm1, xmmword ptr [rbp-50H]
;; size=14 bbWeight=0.50 PerfScore 3.25
G_M48875_IG14:
lea rax, bword ptr [r13-20H]
mov bword ptr [rbp-A0H], rax
;; size=11 bbWeight=0.50 PerfScore 0.75
G_M48875_IG15:
vmovups xmm2, xmmword ptr [r12]
vmovups xmm3, xmmword ptr [r12+10H]
vmovups xmm4, xmmword ptr [reloc @RWD00]
vpand xmm5, xmm2, xmm4
vpand xmm4, xmm3, xmm4
vpackuswb xmm4, xmm5, xmm4
vpsrlw xmm2, xmm2, 8
vpsrlw xmm3, xmm3, 8
vpackuswb xmm2, xmm2, xmm3
vpsrld xmm3, xmm4, 5
vmovups xmm5, xmmword ptr [reloc @RWD32]
vpand xmm3, xmm3, xmm5
vmovups xmm6, xmmword ptr [reloc @RWD64]
vpshufb xmm3, xmm6, xmm3
vmovups xmm7, xmmword ptr [reloc @RWD96]
vpand xmm4, xmm4, xmm7
- vpcmpub k1, xmm4, xmm5, 6
- vpmovm2b xmm8, k1
- vmovups xmm9, xmmword ptr [reloc @RWD128]
- vpsubb xmm10, xmm4, xmm9
- vpshufb xmm10, xmm1, xmm10
+ vmovups xmm8, xmmword ptr [reloc @RWD128]
+ vpcmpub k1, xmm4, xmm8, 6
+ vpmovm2b xmm9, k1
+ vmovups xmm10, xmmword ptr [reloc @RWD160]
+ vpsubb xmm11, xmm4, xmm10
+ vpshufb xmm11, xmm1, xmm11
vpshufb xmm4, xmm0, xmm4
- vpternlogd xmm8, xmm10, xmm4, -54
- vpand xmm3, xmm8, xmm3
+ vpternlogd xmm9, xmm11, xmm4, -54
+ vpand xmm3, xmm9, xmm3
vxorps xmm4, xmm4, xmm4
vpcmpeqb xmm3, xmm3, xmm4
vpcmpeqd xmm4, xmm4, xmm4
vpxor xmm3, xmm3, xmm4
vpsrld xmm4, xmm2, 5
vpand xmm4, xmm4, xmm5
vpshufb xmm4, xmm6, xmm4
vpand xmm2, xmm2, xmm7
- vpcmpub k1, xmm2, xmm5, 6
+ vpcmpub k1, xmm2, xmm8, 6
vpmovm2b xmm5, k1
- vpsubb xmm6, xmm2, xmm9
+ vpsubb xmm6, xmm2, xmm10
vpshufb xmm6, xmm1, xmm6
vpshufb xmm2, xmm0, xmm2
vpternlogd xmm5, xmm6, xmm2, -54
vpand xmm2, xmm5, xmm4
vxorps xmm4, xmm4, xmm4
vpcmpeqb xmm2, xmm2, xmm4
vpcmpeqd xmm4, xmm4, xmm4
vpxor xmm2, xmm2, xmm4
vpand xmm2, xmm3, xmm2
vptest xmm2, xmm2
je SHORT G_M48875_IG18
- ;; size=236 bbWeight=4 PerfScore 228.00
+ ;; size=244 bbWeight=4 PerfScore 240.00
G_M48875_IG16:
vpmovmskb ecx, xmm2
;; size=4 bbWeight=2 PerfScore 4.00
G_M48875_IG17:
mov dword ptr [rbp-98H], ecx
xor esi, esi
tzcnt esi, ecx
lea r8, bword ptr [r12+2*rsi]
mov bword ptr [rbp-B8H], r8
movzx rsi, word ptr [r8]
movsx rsi, si
mov rdi, rbx
mov edx, r15d
mov r9, 0xD1FFAB1E ; code for System.SpanHelpers:NonPackedContainsValueType[short](byref,short,int):bool
call [r9]System.SpanHelpers:NonPackedContainsValueType[short](byref,short,int):bool
test eax, eax
jne SHORT G_M48875_IG20
blsr ecx, dword ptr [rbp-98H]
mov eax, ecx
test eax, eax
mov ecx, eax
jne SHORT G_M48875_IG17
;; size=71 bbWeight=16 PerfScore 252.00
G_M48875_IG18:
add r12, 32
mov rax, bword ptr [rbp-A0H]
cmp r12, rax
vmovaps xmm0, xmmword ptr [rbp-40H]
vmovaps xmm1, xmmword ptr [rbp-50H]
jbe G_M48875_IG15
;; size=30 bbWeight=4 PerfScore 34.00
G_M48875_IG19:
cmp r12, r13
je SHORT G_M48875_IG22
mov rax, bword ptr [rbp-A0H]
mov r12, rax
jmp G_M48875_IG15
;; size=20 bbWeight=0.50 PerfScore 2.25
G_M48875_IG20:
mov rbx, bword ptr [rbp-B8H]
sub rbx, r14
mov rax, rbx
shr rax, 1
;; size=16 bbWeight=0.50 PerfScore 1.00
G_M48875_IG21:
vzeroupper
add rsp, 152
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=21 bbWeight=0.50 PerfScore 2.62
G_M48875_IG22:
mov eax, -1
;; size=5 bbWeight=0.50 PerfScore 0.12
G_M48875_IG23:
vzeroupper
add rsp, 152
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
;; size=21 bbWeight=0.50 PerfScore 2.62
RWD00 dq 00FF00FF00FF00FFh, 00FF00FF00FF00FFh, 00FF00FF00FF00FFh, 00FF00FF00FF00FFh
-RWD32 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD32 dq 0707070707070707h, 0707070707070707h, 0707070707070707h, 0707070707070707h
RWD64 dq 8040201008040201h, 8040201008040201h, 8040201008040201h, 8040201008040201h
RWD96 dq 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh
-RWD128 dq 1010101010101010h, 1010101010101010h, 1010101010101010h, 1010101010101010h
+RWD128 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD160 dq 1010101010101010h, 1010101010101010h, 1010101010101010h, 1010101010101010h
-; Total bytes of code 1023, prolog size 37, PerfScore 1283.18, instruction count 238, allocated bytes for code 1023 (MethodHash=36e94114) for method System.Buffers.ProbabilisticMap:IndexOfAnyVectorized(byref,byref,int,System.ReadOnlySpan`1[ushort]):int (FullOpts)
+; Total bytes of code 1039, prolog size 37, PerfScore 1312.78, instruction count 240, allocated bytes for code 1039 (MethodHash=36e94114) for method System.Buffers.ProbabilisticMap:IndexOfAnyVectorized(byref,byref,int,System.ReadOnlySpan`1[ushort]):int (FullOpts) 8 (3.19 % of base) - System.Buffers.ProbabilisticMap:ContainsMask16Chars(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],byref):System.Runtime.Intrinsics.Vector128`1[ubyte] ; Assembly listing for method System.Buffers.ProbabilisticMap:ContainsMask16Chars(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],byref):System.Runtime.Intrinsics.Vector128`1[ubyte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 2 single block inlinees; 8 inlinees without PGO data
; Final local variable assignments
;
; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T13] ( 2, 2 ) simd16 -> mm0 single-def
-; V02 arg1 [V02,T14] ( 2, 2 ) simd16 -> mm1 single-def
+; V01 arg0 [V01,T14] ( 2, 2 ) simd16 -> mm0 single-def
+; V02 arg1 [V02,T15] ( 2, 2 ) simd16 -> mm1 single-def
; V03 arg2 [V03,T01] ( 4, 4 ) byref -> rsi single-def
-; V04 loc0 [V04,T05] ( 3, 3 ) simd16 -> mm2
-; V05 loc1 [V05,T06] ( 3, 3 ) simd16 -> mm3
-; V06 loc2 [V06,T07] ( 3, 3 ) simd16 -> mm4
-; V07 loc3 [V07,T08] ( 3, 3 ) simd16 -> mm2
-; V08 loc4 [V08,T15] ( 2, 2 ) simd16 -> mm3
-; V09 loc5 [V09,T16] ( 2, 2 ) simd16 -> mm0
+; V04 loc0 [V04,T04] ( 3, 3 ) simd16 -> mm2
+; V05 loc1 [V05,T05] ( 3, 3 ) simd16 -> mm3
+; V06 loc2 [V06,T06] ( 3, 3 ) simd16 -> mm4
+; V07 loc3 [V07,T07] ( 3, 3 ) simd16 -> mm2
+; V08 loc4 [V08,T16] ( 2, 2 ) simd16 -> mm3
+; V09 loc5 [V09,T17] ( 2, 2 ) simd16 -> mm0
;# V10 OutArgs [V10 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V11 tmp1 [V11 ] ( 0, 0 ) simd16 -> zero-ref
;* V12 tmp2 [V12 ] ( 0, 0 ) simd16 -> zero-ref
-;* V13 tmp3 [V13 ] ( 0, 0 ) simd16 -> zero-ref
-;* V14 tmp4 [V14 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-; V15 tmp5 [V15,T17] ( 2, 2 ) simd16 -> mm3 "Inline stloc first use temp"
-; V16 tmp6 [V16,T03] ( 4, 4 ) simd16 -> mm4 "Inline stloc first use temp"
+;* V13 tmp3 [V13 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+; V14 tmp4 [V14,T18] ( 2, 2 ) simd16 -> mm3 "Inline stloc first use temp"
+; V15 tmp5 [V15,T02] ( 4, 4 ) simd16 -> mm4 "Inline stloc first use temp"
+;* V16 tmp6 [V16 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V17 tmp7 [V17 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V18 tmp8 [V18 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V19 tmp9 [V19 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V20 tmp10 [V20 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V20 tmp10 [V20 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V21 tmp11 [V21 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V22 tmp12 [V22 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V23 tmp13 [V23 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V24 tmp14 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
-;* V25 tmp15 [V25 ] ( 0, 0 ) simd16 -> zero-ref
-;* V26 tmp16 [V26 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-; V27 tmp17 [V27,T18] ( 2, 2 ) simd16 -> mm4 "Inline stloc first use temp"
-; V28 tmp18 [V28,T04] ( 4, 4 ) simd16 -> mm2 "Inline stloc first use temp"
+;* V23 tmp13 [V23 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
+;* V24 tmp14 [V24 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+; V25 tmp15 [V25,T19] ( 2, 2 ) simd16 -> mm4 "Inline stloc first use temp"
+; V26 tmp16 [V26,T03] ( 4, 4 ) simd16 -> mm2 "Inline stloc first use temp"
+;* V27 tmp17 [V27 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V28 tmp18 [V28 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V29 tmp19 [V29 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
;* V30 tmp20 [V30 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V31 tmp21 [V31 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
-;* V32 tmp22 [V32 ] ( 0, 0 ) simd16 -> zero-ref "Inline stloc first use temp"
+;* V31 tmp21 [V31 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
+;* V32 tmp22 [V32 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
;* V33 tmp23 [V33 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V34 tmp24 [V34 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V35 tmp25 [V35 ] ( 0, 0 ) simd16 -> zero-ref "Inline return value spill temp"
-;* V36 tmp26 [V36 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
-; V37 cse0 [V37,T02] ( 5, 5 ) simd16 -> mm5 "CSE - aggressive"
-; V38 cse1 [V38,T09] ( 3, 3 ) simd16 -> mm4 "CSE - moderate"
-; V39 cse2 [V39,T10] ( 3, 3 ) simd16 -> mm6 "CSE - moderate"
-; V40 cse3 [V40,T11] ( 3, 3 ) simd16 -> mm7 "CSE - moderate"
-; V41 cse4 [V41,T12] ( 3, 3 ) simd16 -> mm9 "CSE - moderate"
+;* V34 tmp24 [V34 ] ( 0, 0 ) simd16 -> zero-ref "Inlining Arg"
+; V35 cse0 [V35,T08] ( 3, 3 ) simd16 -> mm4 "CSE - moderate"
+; V36 cse1 [V36,T09] ( 3, 3 ) simd16 -> mm5 "CSE - moderate"
+; V37 cse2 [V37,T10] ( 3, 3 ) simd16 -> mm6 "CSE - moderate"
+; V38 cse3 [V38,T11] ( 3, 3 ) simd16 -> mm7 "CSE - moderate"
+; V39 cse4 [V39,T12] ( 3, 3 ) simd16 -> mm8 "CSE - moderate"
+; V40 cse5 [V40,T13] ( 3, 3 ) simd16 -> mm10 "CSE - moderate"
;
; Lcl frame size = 0
G_M35004_IG01:
push rbp
vzeroupper
mov rbp, rsp
vmovups xmm0, xmmword ptr [rbp+10H]
vmovups xmm1, xmmword ptr [rbp+20H]
;; size=17 bbWeight=1 PerfScore 8.25
G_M35004_IG02:
vmovups xmm2, xmmword ptr [rsi]
vmovups xmm3, xmmword ptr [rsi+10H]
vmovups xmm4, xmmword ptr [reloc @RWD00]
vpand xmm5, xmm2, xmm4
vpand xmm4, xmm3, xmm4
vpackuswb xmm4, xmm5, xmm4
vpsrlw xmm2, xmm2, 8
vpsrlw xmm3, xmm3, 8
vpackuswb xmm2, xmm2, xmm3
vpsrld xmm3, xmm4, 5
vmovups xmm5, xmmword ptr [reloc @RWD16]
vpand xmm3, xmm3, xmm5
vmovups xmm6, xmmword ptr [reloc @RWD32]
vpshufb xmm3, xmm6, xmm3
vmovups xmm7, xmmword ptr [reloc @RWD48]
vpand xmm4, xmm4, xmm7
- vpcmpub k1, xmm4, xmm5, 6
- vpmovm2b xmm8, k1
- vmovups xmm9, xmmword ptr [reloc @RWD64]
- vpsubb xmm10, xmm4, xmm9
- vpshufb xmm10, xmm1, xmm10
+ vmovups xmm8, xmmword ptr [reloc @RWD64]
+ vpcmpub k1, xmm4, xmm8, 6
+ vpmovm2b xmm9, k1
+ vmovups xmm10, xmmword ptr [reloc @RWD80]
+ vpsubb xmm11, xmm4, xmm10
+ vpshufb xmm11, xmm1, xmm11
vpshufb xmm4, xmm0, xmm4
- vpternlogd xmm8, xmm10, xmm4, -54
- vpand xmm3, xmm8, xmm3
+ vpternlogd xmm9, xmm11, xmm4, -54
+ vpand xmm3, xmm9, xmm3
vxorps xmm4, xmm4, xmm4
vpcmpeqb xmm3, xmm3, xmm4
vpcmpeqd xmm4, xmm4, xmm4
vpxor xmm3, xmm3, xmm4
vpsrld xmm4, xmm2, 5
vpand xmm4, xmm4, xmm5
vpshufb xmm4, xmm6, xmm4
vpand xmm2, xmm2, xmm7
- vpcmpub k1, xmm2, xmm5, 6
+ vpcmpub k1, xmm2, xmm8, 6
vpmovm2b xmm5, k1
- vpsubb xmm6, xmm2, xmm9
+ vpsubb xmm6, xmm2, xmm10
vpshufb xmm1, xmm1, xmm6
vpshufb xmm0, xmm0, xmm2
vpternlogd xmm5, xmm1, xmm0, -54
vpand xmm0, xmm5, xmm4
vxorps xmm1, xmm1, xmm1
vpcmpeqb xmm0, xmm0, xmm1
vpcmpeqd xmm1, xmm1, xmm1
vpxor xmm0, xmm0, xmm1
vpand xmm0, xmm3, xmm0
vmovups xmmword ptr [rdi], xmm0
mov rax, rdi
- ;; size=232 bbWeight=1 PerfScore 55.25
+ ;; size=240 bbWeight=1 PerfScore 58.25
G_M35004_IG03:
pop rbp
ret
;; size=2 bbWeight=1 PerfScore 1.50
RWD00 dq 00FF00FF00FF00FFh, 00FF00FF00FF00FFh
-RWD16 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD16 dq 0707070707070707h, 0707070707070707h
RWD32 dq 8040201008040201h, 8040201008040201h
RWD48 dq 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh
-RWD64 dq 1010101010101010h, 1010101010101010h
+RWD64 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD80 dq 1010101010101010h, 1010101010101010h
-; Total bytes of code 251, prolog size 7, PerfScore 90.10, instruction count 53, allocated bytes for code 251 (MethodHash=a0077743) for method System.Buffers.ProbabilisticMap:ContainsMask16Chars(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],byref):System.Runtime.Intrinsics.Vector128`1[ubyte] (FullOpts)
+; Total bytes of code 259, prolog size 7, PerfScore 93.90, instruction count 54, allocated bytes for code 259 (MethodHash=a0077743) for method System.Buffers.ProbabilisticMap:ContainsMask16Chars(System.Runtime.Intrinsics.Vector128`1[ubyte],System.Runtime.Intrinsics.Vector128`1[ubyte],byref):System.Runtime.Intrinsics.Vector128`1[ubyte] (FullOpts) 8 (3.05 % of base) - System.Buffers.ProbabilisticMap:ContainsMask32CharsAvx2(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],byref):System.Runtime.Intrinsics.Vector256`1[ubyte] ; Assembly listing for method System.Buffers.ProbabilisticMap:ContainsMask32CharsAvx2(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],byref):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
; rbp based frame
; partially interruptible
; No PGO data
; 0 inlinees with PGO data; 4 single block inlinees; 0 inlinees without PGO data
; Final local variable assignments
;
; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T12] ( 2, 2 ) simd32 -> mm0 single-def
-; V02 arg1 [V02,T13] ( 2, 2 ) simd32 -> mm1 single-def
+; V01 arg0 [V01,T13] ( 2, 2 ) simd32 -> mm0 single-def
+; V02 arg1 [V02,T14] ( 2, 2 ) simd32 -> mm1 single-def
; V03 arg2 [V03,T01] ( 4, 4 ) byref -> rsi single-def
-; V04 loc0 [V04,T05] ( 3, 3 ) simd32 -> mm2
-; V05 loc1 [V05,T06] ( 3, 3 ) simd32 -> mm3
-; V06 loc2 [V06,T07] ( 3, 3 ) simd32 -> mm4
-; V07 loc3 [V07,T08] ( 3, 3 ) simd32 -> mm2
-; V08 loc4 [V08,T14] ( 2, 2 ) simd32 -> mm3
-; V09 loc5 [V09,T15] ( 2, 2 ) simd32 -> mm0
+; V04 loc0 [V04,T04] ( 3, 3 ) simd32 -> mm2
+; V05 loc1 [V05,T05] ( 3, 3 ) simd32 -> mm3
+; V06 loc2 [V06,T06] ( 3, 3 ) simd32 -> mm4
+; V07 loc3 [V07,T07] ( 3, 3 ) simd32 -> mm2
+; V08 loc4 [V08,T15] ( 2, 2 ) simd32 -> mm3
+; V09 loc5 [V09,T16] ( 2, 2 ) simd32 -> mm0
;# V10 OutArgs [V10 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
;* V11 tmp1 [V11 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V12 tmp2 [V12,T16] ( 2, 2 ) simd32 -> mm3 "Inline stloc first use temp"
-; V13 tmp3 [V13,T03] ( 4, 4 ) simd32 -> mm4 "Inline stloc first use temp"
+; V12 tmp2 [V12,T17] ( 2, 2 ) simd32 -> mm3 "Inline stloc first use temp"
+; V13 tmp3 [V13,T02] ( 4, 4 ) simd32 -> mm4 "Inline stloc first use temp"
;* V14 tmp4 [V14 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V15 tmp5 [V15 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V16 tmp6 [V16 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V17 tmp7 [V17 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V18 tmp8 [V18 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V19 tmp9 [V19,T17] ( 2, 2 ) simd32 -> mm4 "Inline stloc first use temp"
-; V20 tmp10 [V20,T04] ( 4, 4 ) simd32 -> mm2 "Inline stloc first use temp"
+; V19 tmp9 [V19,T18] ( 2, 2 ) simd32 -> mm4 "Inline stloc first use temp"
+; V20 tmp10 [V20,T03] ( 4, 4 ) simd32 -> mm2 "Inline stloc first use temp"
;* V21 tmp11 [V21 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V22 tmp12 [V22 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V23 tmp13 [V23 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
;* V24 tmp14 [V24 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V25 cse0 [V25,T02] ( 5, 5 ) simd32 -> mm5 "CSE - aggressive"
-; V26 cse1 [V26,T09] ( 3, 3 ) simd32 -> mm4 "CSE - moderate"
+; V25 cse0 [V25,T08] ( 3, 3 ) simd32 -> mm4 "CSE - moderate"
+; V26 cse1 [V26,T09] ( 3, 3 ) simd32 -> mm5 "CSE - moderate"
; V27 cse2 [V27,T10] ( 3, 3 ) simd32 -> mm6 "CSE - moderate"
-; V28 cse3 [V28,T11] ( 3, 3 ) simd32 -> mm8 "CSE - moderate"
+; V28 cse3 [V28,T11] ( 3, 3 ) simd32 -> mm7 "CSE - moderate"
+; V29 cse4 [V29,T12] ( 3, 3 ) simd32 -> mm9 "CSE - moderate"
;
; Lcl frame size = 0
G_M59405_IG01:
push rbp
vzeroupper
mov rbp, rsp
vmovups ymm0, ymmword ptr [rbp+10H]
vmovups ymm1, ymmword ptr [rbp+30H]
;; size=17 bbWeight=1 PerfScore 10.25
G_M59405_IG02:
vmovups ymm2, ymmword ptr [rsi]
vmovups ymm3, ymmword ptr [rsi+20H]
vmovups ymm4, ymmword ptr [reloc @RWD00]
vpand ymm5, ymm2, ymm4
vpand ymm4, ymm3, ymm4
vpackuswb ymm4, ymm5, ymm4
vpsrlw ymm2, ymm2, 8
vpsrlw ymm3, ymm3, 8
vpackuswb ymm2, ymm2, ymm3
vpsrld ymm3, ymm4, 5
vmovups ymm5, ymmword ptr [reloc @RWD32]
vpand ymm3, ymm3, ymm5
vmovups ymm6, ymmword ptr [reloc @RWD64]
vpshufb ymm3, ymm6, ymm3
vmovups ymm6, ymmword ptr [reloc @RWD96]
vpand ymm4, ymm4, ymm6
- vpcmpub k1, ymm4, ymm5, 6
- vpmovm2b ymm7, k1
- vmovups ymm8, ymmword ptr [reloc @RWD128]
- vpsubb ymm9, ymm4, ymm8
- vpshufb ymm9, ymm1, ymm9
+ vmovups ymm7, ymmword ptr [reloc @RWD128]
+ vpcmpub k1, ymm4, ymm7, 6
+ vpmovm2b ymm8, k1
+ vmovups ymm9, ymmword ptr [reloc @RWD160]
+ vpsubb ymm10, ymm4, ymm9
+ vpshufb ymm10, ymm1, ymm10
vpshufb ymm4, ymm0, ymm4
- vpternlogd ymm7, ymm9, ymm4, -54
- vpand ymm3, ymm7, ymm3
+ vpternlogd ymm8, ymm10, ymm4, -54
+ vpand ymm3, ymm8, ymm3
vxorps ymm4, ymm4, ymm4
vpcmpeqb ymm3, ymm3, ymm4
vpcmpeqd ymm4, ymm4, ymm4
vpxor ymm3, ymm3, ymm4
vpsrld ymm4, ymm2, 5
vpand ymm4, ymm4, ymm5
- vmovups ymm7, ymmword ptr [reloc @RWD64]
- vpshufb ymm4, ymm7, ymm4
+ vmovups ymm5, ymmword ptr [reloc @RWD64]
+ vpshufb ymm4, ymm5, ymm4
vpand ymm2, ymm2, ymm6
- vpcmpub k1, ymm2, ymm5, 6
+ vpcmpub k1, ymm2, ymm7, 6
vpmovm2b ymm5, k1
- vpsubb ymm6, ymm2, ymm8
+ vpsubb ymm6, ymm2, ymm9
vpshufb ymm1, ymm1, ymm6
vpshufb ymm0, ymm0, ymm2
vpternlogd ymm5, ymm1, ymm0, -54
vpand ymm0, ymm5, ymm4
vxorps ymm1, ymm1, ymm1
vpcmpeqb ymm0, ymm0, ymm1
vpcmpeqd ymm1, ymm1, ymm1
vpxor ymm0, ymm0, ymm1
vpand ymm0, ymm3, ymm0
vmovups ymmword ptr [rdi], ymm0
mov rax, rdi
- ;; size=240 bbWeight=1 PerfScore 74.25
+ ;; size=248 bbWeight=1 PerfScore 78.25
G_M59405_IG03:
vzeroupper
pop rbp
ret
;; size=5 bbWeight=1 PerfScore 2.50
RWD00 dq 00FF00FF00FF00FFh, 00FF00FF00FF00FFh, 00FF00FF00FF00FFh, 00FF00FF00FF00FFh
-RWD32 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD32 dq 0707070707070707h, 0707070707070707h, 0707070707070707h, 0707070707070707h
RWD64 dq 8040201008040201h, 8040201008040201h, 8040201008040201h, 8040201008040201h
RWD96 dq 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh, 1F1F1F1F1F1F1F1Fh
-RWD128 dq 1010101010101010h, 1010101010101010h, 1010101010101010h, 1010101010101010h
+RWD128 dq 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh, 0F0F0F0F0F0F0F0Fh
+RWD160 dq 1010101010101010h, 1010101010101010h, 1010101010101010h, 1010101010101010h
-; Total bytes of code 262, prolog size 7, PerfScore 113.20, instruction count 55, allocated bytes for code 262 (MethodHash=e39717f2) for method System.Buffers.ProbabilisticMap:ContainsMask32CharsAvx2(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],byref):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts)
+; Total bytes of code 270, prolog size 7, PerfScore 118.00, instruction count 56, allocated bytes for code 270 (MethodHash=e39717f2) for method System.Buffers.ProbabilisticMap:ContainsMask32CharsAvx2(System.Runtime.Intrinsics.Vector256`1[ubyte],System.Runtime.Intrinsics.Vector256`1[ubyte],byref):System.Runtime.Intrinsics.Vector256`1[ubyte] (FullOpts) |
Top method improvements-629 (-85.69 % of base) - System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] ; Assembly listing for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
-; rbp based frame
-; fully interruptible
+; rsp based frame
+; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 48 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T17] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T42] ( 2, 2 ) simd64 -> [rbp+10H] do-not-enreg[SF] single-def
-; V02 arg1 [V02,T18] ( 3, 3 ) int -> rsi single-def
+; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
+; V01 arg0 [V01,T09] ( 2, 2 ) simd64 -> [rsp+80H] do-not-enreg[SF] single-def
+; V02 arg1 [V02,T02] ( 3, 3 ) int -> rsi single-def
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V04 tmp1 [V04,T33] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
-; V05 tmp2 [V05,T34] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
-; V06 tmp3 [V06,T27] ( 3, 6 ) simd32 -> [rbp-30H] do-not-enreg[SF] "Inlining Arg"
-; V07 tmp4 [V07,T35] ( 2, 4 ) simd16 -> mm0 "impAppendStmt"
-;* V08 tmp5 [V08 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V09 tmp6 [V09,T28] ( 3, 6 ) simd16 -> [rbp-40H] do-not-enreg[SF] "Inlining Arg"
-;* V10 tmp7 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V11 tmp8 [V11 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V12 tmp9 [V12 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V13 tmp10 [V13 ] ( 2, 5 ) struct ( 8) [rbp-48H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V14 tmp11 [V14,T01] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V15 tmp12 [V15 ] ( 2, 10 ) struct ( 8) [rbp-50H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V16 tmp13 [V16 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V17 tmp14 [V17 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V18 tmp15 [V18 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V19 tmp16 [V19,T09] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V20 tmp17 [V20 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V21 tmp18 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V22 tmp19 [V22 ] ( 2, 5 ) struct ( 8) [rbp-58H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V23 tmp20 [V23,T02] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V24 tmp21 [V24 ] ( 2, 10 ) struct ( 8) [rbp-60H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V25 tmp22 [V25 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V26 tmp23 [V26 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V27 tmp24 [V27 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V28 tmp25 [V28,T10] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V29 tmp26 [V29 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V30 tmp27 [V30,T37] ( 3, 3 ) simd16 -> [rbp-70H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V31 tmp28 [V31,T29] ( 3, 6 ) simd16 -> [rbp-80H] do-not-enreg[SF] "Inlining Arg"
-;* V32 tmp29 [V32 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V33 tmp30 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V34 tmp31 [V34 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V35 tmp32 [V35 ] ( 2, 5 ) struct ( 8) [rbp-88H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V36 tmp33 [V36,T03] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V37 tmp34 [V37 ] ( 2, 10 ) struct ( 8) [rbp-90H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V38 tmp35 [V38 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V39 tmp36 [V39 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V40 tmp37 [V40 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V41 tmp38 [V41,T11] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V42 tmp39 [V42 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V43 tmp40 [V43 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V44 tmp41 [V44 ] ( 2, 5 ) struct ( 8) [rbp-98H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V45 tmp42 [V45,T04] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V46 tmp43 [V46 ] ( 2, 10 ) struct ( 8) [rbp-A0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V47 tmp44 [V47 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V48 tmp45 [V48 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V49 tmp46 [V49 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V50 tmp47 [V50,T12] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V51 tmp48 [V51 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V52 tmp49 [V52,T38] ( 3, 3 ) simd16 -> [rbp-B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V53 tmp50 [V53 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V54 tmp51 [V54 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V55 tmp52 [V55,T30] ( 3, 6 ) simd32 -> [rbp-D0H] do-not-enreg[SF] "Inlining Arg"
-; V56 tmp53 [V56,T36] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
-;* V57 tmp54 [V57 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V58 tmp55 [V58,T31] ( 3, 6 ) simd16 -> [rbp-E0H] do-not-enreg[SF] "Inlining Arg"
-;* V59 tmp56 [V59 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V60 tmp57 [V60 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V61 tmp58 [V61 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V62 tmp59 [V62 ] ( 2, 5 ) struct ( 8) [rbp-E8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V63 tmp60 [V63,T05] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V64 tmp61 [V64 ] ( 2, 10 ) struct ( 8) [rbp-F0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V65 tmp62 [V65 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V66 tmp63 [V66 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V67 tmp64 [V67 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V68 tmp65 [V68,T13] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V69 tmp66 [V69 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V70 tmp67 [V70 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V71 tmp68 [V71 ] ( 2, 5 ) struct ( 8) [rbp-F8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V72 tmp69 [V72,T06] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V73 tmp70 [V73 ] ( 2, 10 ) struct ( 8) [rbp-100H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V74 tmp71 [V74 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V75 tmp72 [V75 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V76 tmp73 [V76 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V77 tmp74 [V77,T14] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V78 tmp75 [V78 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V79 tmp76 [V79,T39] ( 3, 3 ) simd16 -> [rbp-110H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V80 tmp77 [V80,T32] ( 3, 6 ) simd16 -> [rbp-120H] do-not-enreg[SF] "Inlining Arg"
-;* V81 tmp78 [V81 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V82 tmp79 [V82 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V83 tmp80 [V83 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V84 tmp81 [V84 ] ( 2, 5 ) struct ( 8) [rbp-128H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V85 tmp82 [V85,T07] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V86 tmp83 [V86 ] ( 2, 10 ) struct ( 8) [rbp-130H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V87 tmp84 [V87 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V88 tmp85 [V88 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V89 tmp86 [V89 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V90 tmp87 [V90,T15] ( 2, 8 ) byte -> rcx "Inline return value spill temp"
-;* V91 tmp88 [V91 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V92 tmp89 [V92 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V93 tmp90 [V93 ] ( 2, 5 ) struct ( 8) [rbp-138H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V94 tmp91 [V94,T08] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V95 tmp92 [V95 ] ( 2, 10 ) struct ( 8) [rbp-140H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V96 tmp93 [V96 ] ( 0, 0 ) byte -> zero-ref "Inline stloc first use temp"
-;* V97 tmp94 [V97 ] ( 0, 0 ) byte -> zero-ref "Inline return value spill temp"
-;* V98 tmp95 [V98 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V99 tmp96 [V99,T16] ( 2, 8 ) byte -> rdx "Inline return value spill temp"
-;* V100 tmp97 [V100 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V101 tmp98 [V101,T40] ( 3, 3 ) simd16 -> [rbp-150H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V102 tmp99 [V102 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V103 tmp100 [V103 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V104 tmp101 [V104,T41] ( 3, 3 ) simd64 -> [rbp-1B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V105 tmp102 [V105,T19] ( 2, 2 ) long -> rax "field V10._00 (fldOffset=0x0)" P-INDEP
-; V106 tmp103 [V106,T20] ( 2, 2 ) long -> rcx "field V11._00 (fldOffset=0x0)" P-INDEP
-;* V107 tmp104 [V107 ] ( 0, 0 ) long -> zero-ref "field V12._00 (fldOffset=0x0)" P-INDEP
-; V108 tmp105 [V108 ] ( 2, 5 ) long -> [rbp-48H] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-; V109 tmp106 [V109 ] ( 2, 9 ) long -> [rbp-50H] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;* V110 tmp107 [V110 ] ( 0, 0 ) long -> zero-ref "field V21._00 (fldOffset=0x0)" P-INDEP
-; V111 tmp108 [V111 ] ( 2, 5 ) long -> [rbp-58H] do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-; V112 tmp109 [V112 ] ( 2, 9 ) long -> [rbp-60H] do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-; V113 tmp110 [V113,T21] ( 2, 2 ) long -> rax "field V32._00 (fldOffset=0x0)" P-INDEP
-; V114 tmp111 [V114,T22] ( 2, 2 ) long -> rcx "field V33._00 (fldOffset=0x0)" P-INDEP
-;* V115 tmp112 [V115 ] ( 0, 0 ) long -> zero-ref "field V34._00 (fldOffset=0x0)" P-INDEP
-; V116 tmp113 [V116 ] ( 2, 5 ) long -> [rbp-88H] do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-; V117 tmp114 [V117 ] ( 2, 9 ) long -> [rbp-90H] do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;* V118 tmp115 [V118 ] ( 0, 0 ) long -> zero-ref "field V43._00 (fldOffset=0x0)" P-INDEP
-; V119 tmp116 [V119 ] ( 2, 5 ) long -> [rbp-98H] do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-; V120 tmp117 [V120 ] ( 2, 9 ) long -> [rbp-A0H] do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-; V121 tmp118 [V121,T23] ( 2, 2 ) long -> rax "field V59._00 (fldOffset=0x0)" P-INDEP
-; V122 tmp119 [V122,T24] ( 2, 2 ) long -> rcx "field V60._00 (fldOffset=0x0)" P-INDEP
-;* V123 tmp120 [V123 ] ( 0, 0 ) long -> zero-ref "field V61._00 (fldOffset=0x0)" P-INDEP
-; V124 tmp121 [V124 ] ( 2, 5 ) long -> [rbp-E8H] do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
-; V125 tmp122 [V125 ] ( 2, 9 ) long -> [rbp-F0H] do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;* V126 tmp123 [V126 ] ( 0, 0 ) long -> zero-ref "field V70._00 (fldOffset=0x0)" P-INDEP
-; V127 tmp124 [V127 ] ( 2, 5 ) long -> [rbp-F8H] do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-; V128 tmp125 [V128 ] ( 2, 9 ) long -> [rbp-100H] do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-; V129 tmp126 [V129,T25] ( 2, 2 ) long -> rax "field V81._00 (fldOffset=0x0)" P-INDEP
-; V130 tmp127 [V130,T26] ( 2, 2 ) long -> rcx "field V82._00 (fldOffset=0x0)" P-INDEP
-;* V131 tmp128 [V131 ] ( 0, 0 ) long -> zero-ref "field V83._00 (fldOffset=0x0)" P-INDEP
-; V132 tmp129 [V132 ] ( 2, 5 ) long -> [rbp-128H] do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-; V133 tmp130 [V133 ] ( 2, 9 ) long -> [rbp-130H] do-not-enreg[X] addr-exposed "field V86._00 (fldOffset=0x0)" P-DEP
-;* V134 tmp131 [V134 ] ( 0, 0 ) long -> zero-ref "field V92._00 (fldOffset=0x0)" P-INDEP
-; V135 tmp132 [V135 ] ( 2, 5 ) long -> [rbp-138H] do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-; V136 tmp133 [V136 ] ( 2, 9 ) long -> [rbp-140H] do-not-enreg[X] addr-exposed "field V95._00 (fldOffset=0x0)" P-DEP
-; V137 cse0 [V137,T00] ( 9, 33 ) int -> rsi "CSE - aggressive"
+; V04 tmp1 [V04,T01] ( 3, 6 ) int -> rax "fgMakeTemp is creating a new local variable"
+; V05 tmp2 [V05,T03] ( 2, 4 ) int -> rsi "fgMakeTemp is creating a new local variable"
+; V06 tmp3 [V06,T05] ( 2, 4 ) simd32 -> mm0 "Inlining Arg"
+; V07 tmp4 [V07,T06] ( 2, 4 ) simd32 -> mm1 "Inlining Arg"
+; V08 tmp5 [V08,T07] ( 3, 3 ) simd64 -> [rsp+00H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
+; V09 cse0 [V09,T08] ( 3, 3 ) simd32 -> mm1 "CSE - aggressive"
+; V10 cse1 [V10,T04] ( 3, 3 ) int -> rsi "CSE - aggressive"
;
-; Lcl frame size = 432
+; Lcl frame size = 120
G_M22707_IG01:
- push rbp
- sub rsp, 432
+ sub rsp, 120
vzeroupper
- lea rbp, [rsp+1B0H]
- ;; size=19 bbWeight=1 PerfScore 2.75
+ ;; size=7 bbWeight=1 PerfScore 1.25
G_M22707_IG02:
- vmovups ymm0, ymmword ptr [rbp+10H]
- vmovups ymmword ptr [rbp-30H], ymm0
- vmovups xmm0, xmmword ptr [rbp-30H]
- vmovaps xmmword ptr [rbp-40H], xmm0
- mov rax, qword ptr [rbp-40H]
- mov qword ptr [rbp-50H], rax
- xor eax, eax
+ vmovups ymm0, ymmword ptr [rsp+80H]
and esi, 7
- align [0 bytes for IG03]
- ;; size=33 bbWeight=1 PerfScore 11.50
-G_M22707_IG03:
- lea rcx, bword ptr [rbp-50H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-48H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG03
- ;; size=38 bbWeight=4 PerfScore 27.00
-G_M22707_IG04:
- mov rax, qword ptr [rbp-48H]
- mov rcx, qword ptr [rbp-38H]
- mov qword ptr [rbp-60H], rcx
- xor ecx, ecx
- align [0 bytes for IG05]
- ;; size=14 bbWeight=1 PerfScore 3.25
-G_M22707_IG05:
- lea rdx, bword ptr [rbp-60H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-58H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG05
- ;; size=39 bbWeight=4 PerfScore 27.00
-G_M22707_IG06:
- mov rcx, qword ptr [rbp-58H]
- mov qword ptr [rbp-70H], rax
- mov qword ptr [rbp-68H], rcx
- vmovaps xmm0, xmmword ptr [rbp-70H]
- vmovups xmm1, xmmword ptr [rbp-20H]
- vmovaps xmmword ptr [rbp-80H], xmm1
- mov rax, qword ptr [rbp-80H]
- mov qword ptr [rbp-90H], rax
- xor eax, eax
- align [0 bytes for IG07]
- ;; size=40 bbWeight=1 PerfScore 12.25
-G_M22707_IG07:
- lea rcx, bword ptr [rbp-90H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-88H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG07
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG08:
- mov rax, qword ptr [rbp-88H]
- mov rcx, qword ptr [rbp-78H]
- mov qword ptr [rbp-A0H], rcx
- xor ecx, ecx
- align [0 bytes for IG09]
- ;; size=20 bbWeight=1 PerfScore 3.25
-G_M22707_IG09:
- lea rdx, bword ptr [rbp-A0H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-98H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG09
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG10:
- mov rcx, qword ptr [rbp-98H]
- mov qword ptr [rbp-B0H], rax
- mov qword ptr [rbp-A8H], rcx
- vinserti128 ymm0, ymm0, xmmword ptr [rbp-B0H], 1
- vmovups ymm1, ymmword ptr [rbp+30H]
- vmovups ymmword ptr [rbp-D0H], ymm1
- vmovups xmm1, xmmword ptr [rbp-D0H]
- vmovaps xmmword ptr [rbp-E0H], xmm1
- mov rax, qword ptr [rbp-E0H]
- mov qword ptr [rbp-F0H], rax
- xor eax, eax
- align [0 bytes for IG11]
- ;; size=76 bbWeight=1 PerfScore 18.25
-G_M22707_IG11:
- lea rcx, bword ptr [rbp-F0H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-E8H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG11
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG12:
- mov rax, qword ptr [rbp-E8H]
- mov rcx, qword ptr [rbp-D8H]
- mov qword ptr [rbp-100H], rcx
- xor ecx, ecx
- align [0 bytes for IG13]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M22707_IG13:
- lea rdx, bword ptr [rbp-100H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-F8H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG13
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG14:
- mov rcx, qword ptr [rbp-F8H]
- mov qword ptr [rbp-110H], rax
- mov qword ptr [rbp-108H], rcx
- vmovaps xmm1, xmmword ptr [rbp-110H]
- vmovups xmm2, xmmword ptr [rbp-C0H]
- vmovaps xmmword ptr [rbp-120H], xmm2
- mov rax, qword ptr [rbp-120H]
- mov qword ptr [rbp-130H], rax
- xor eax, eax
- align [3 bytes for IG15]
- ;; size=64 bbWeight=1 PerfScore 12.50
-G_M22707_IG15:
- lea rcx, bword ptr [rbp-130H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movsx rcx, cl
- lea rdx, bword ptr [rbp-128H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M22707_IG15
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M22707_IG16:
- mov rax, qword ptr [rbp-128H]
- mov rcx, qword ptr [rbp-118H]
- mov qword ptr [rbp-140H], rcx
- xor ecx, ecx
- align [0 bytes for IG17]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M22707_IG17:
- lea rdx, bword ptr [rbp-140H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movsx rdx, dl
- lea r8, bword ptr [rbp-138H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M22707_IG17
- ;; size=45 bbWeight=4 PerfScore 27.00
-G_M22707_IG18:
- mov rcx, qword ptr [rbp-138H]
- mov qword ptr [rbp-150H], rax
- mov qword ptr [rbp-148H], rcx
- vinserti128 ymm1, ymm1, xmmword ptr [rbp-150H], 1
- vmovups ymmword ptr [rbp-1B0H], ymm0
- vmovups ymmword ptr [rbp-190H], ymm1
- vmovups zmm0, zmmword ptr [rbp-1B0H]
+ mov eax, esi
+ vmovd xmm1, rax
+ vpsrld ymm0, ymm0, ymm1
+ mov ecx, 255
+ shrx eax, ecx, eax
+ vpbroadcastb ymm1, eax
+ vpand ymm0, ymm0, ymm1
+ vmovups ymm2, ymmword ptr [rsp+A0H]
+ vmovd xmm3, rsi
+ vpsrld ymm2, ymm2, ymm3
+ vpand ymm1, ymm2, ymm1
+ vmovups ymmword ptr [rsp], ymm0
+ vmovups ymmword ptr [rsp+20H], ymm1
+ vmovups zmm0, zmmword ptr [rsp]
vmovups zmmword ptr [rdi], zmm0
mov rax, rdi
- ;; size=66 bbWeight=1 PerfScore 14.25
-G_M22707_IG19:
+ ;; size=90 bbWeight=1 PerfScore 29.17
+G_M22707_IG03:
vzeroupper
- add rsp, 432
- pop rbp
+ add rsp, 120
ret
- ;; size=12 bbWeight=1 PerfScore 2.75
+ ;; size=8 bbWeight=1 PerfScore 2.25
-; Total bytes of code 734, prolog size 19, PerfScore 376.65, instruction count 166, allocated bytes for code 734 (MethodHash=eab6a74c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts)
+; Total bytes of code 105, prolog size 7, PerfScore 43.17, instruction count 23, allocated bytes for code 105 (MethodHash=eab6a74c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[byte],int):System.Runtime.Intrinsics.Vector512`1[byte] (FullOpts) -625 (-85.62 % of base) - System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] ; Assembly listing for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
; Emitting BLENDED_CODE for X64 with AVX512 - Unix
; FullOpts code
; optimized code
-; rbp based frame
-; fully interruptible
+; rsp based frame
+; partially interruptible
; No PGO data
-; 0 inlinees with PGO data; 48 single block inlinees; 27 inlinees without PGO data
+; 0 inlinees with PGO data; 4 single block inlinees; 1 inlinees without PGO data
; Final local variable assignments
;
-; V00 RetBuf [V00,T17] ( 4, 4 ) byref -> rdi single-def
-; V01 arg0 [V01,T42] ( 2, 2 ) simd64 -> [rbp+10H] do-not-enreg[SF] single-def
-; V02 arg1 [V02,T18] ( 3, 3 ) int -> rsi single-def
+; V00 RetBuf [V00,T00] ( 4, 4 ) byref -> rdi single-def
+; V01 arg0 [V01,T09] ( 2, 2 ) simd64 -> [rsp+80H] do-not-enreg[SF] single-def
+; V02 arg1 [V02,T02] ( 3, 3 ) int -> rsi single-def
;# V03 OutArgs [V03 ] ( 1, 1 ) struct ( 0) [rsp+00H] do-not-enreg[XS] addr-exposed "OutgoingArgSpace"
-; V04 tmp1 [V04,T33] ( 2, 4 ) simd32 -> mm0 "impAppendStmt"
-; V05 tmp2 [V05,T34] ( 2, 4 ) simd32 -> mm1 "spilled call-like call argument"
-; V06 tmp3 [V06,T27] ( 3, 6 ) simd32 -> [rbp-30H] do-not-enreg[SF] "Inlining Arg"
-; V07 tmp4 [V07,T35] ( 2, 4 ) simd16 -> mm0 "impAppendStmt"
-;* V08 tmp5 [V08 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V09 tmp6 [V09,T28] ( 3, 6 ) simd16 -> [rbp-40H] do-not-enreg[SF] "Inlining Arg"
-;* V10 tmp7 [V10 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V11 tmp8 [V11 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V12 tmp9 [V12 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V13 tmp10 [V13 ] ( 2, 5 ) struct ( 8) [rbp-48H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V14 tmp11 [V14,T01] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V15 tmp12 [V15 ] ( 2, 10 ) struct ( 8) [rbp-50H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V16 tmp13 [V16 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V17 tmp14 [V17 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V18 tmp15 [V18 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V19 tmp16 [V19,T09] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V20 tmp17 [V20 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V21 tmp18 [V21 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V22 tmp19 [V22 ] ( 2, 5 ) struct ( 8) [rbp-58H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V23 tmp20 [V23,T02] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V24 tmp21 [V24 ] ( 2, 10 ) struct ( 8) [rbp-60H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V25 tmp22 [V25 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V26 tmp23 [V26 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V27 tmp24 [V27 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V28 tmp25 [V28,T10] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V29 tmp26 [V29 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V30 tmp27 [V30,T37] ( 3, 3 ) simd16 -> [rbp-70H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V31 tmp28 [V31,T29] ( 3, 6 ) simd16 -> [rbp-80H] do-not-enreg[SF] "Inlining Arg"
-;* V32 tmp29 [V32 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V33 tmp30 [V33 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V34 tmp31 [V34 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V35 tmp32 [V35 ] ( 2, 5 ) struct ( 8) [rbp-88H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V36 tmp33 [V36,T03] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V37 tmp34 [V37 ] ( 2, 10 ) struct ( 8) [rbp-90H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V38 tmp35 [V38 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V39 tmp36 [V39 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V40 tmp37 [V40 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V41 tmp38 [V41,T11] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V42 tmp39 [V42 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V43 tmp40 [V43 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V44 tmp41 [V44 ] ( 2, 5 ) struct ( 8) [rbp-98H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V45 tmp42 [V45,T04] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V46 tmp43 [V46 ] ( 2, 10 ) struct ( 8) [rbp-A0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V47 tmp44 [V47 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V48 tmp45 [V48 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V49 tmp46 [V49 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V50 tmp47 [V50,T12] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V51 tmp48 [V51 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V52 tmp49 [V52,T38] ( 3, 3 ) simd16 -> [rbp-B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V53 tmp50 [V53 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V54 tmp51 [V54 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V55 tmp52 [V55,T30] ( 3, 6 ) simd32 -> [rbp-D0H] do-not-enreg[SF] "Inlining Arg"
-; V56 tmp53 [V56,T36] ( 2, 4 ) simd16 -> mm1 "impAppendStmt"
-;* V57 tmp54 [V57 ] ( 0, 0 ) simd16 -> zero-ref "spilled call-like call argument"
-; V58 tmp55 [V58,T31] ( 3, 6 ) simd16 -> [rbp-E0H] do-not-enreg[SF] "Inlining Arg"
-;* V59 tmp56 [V59 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V60 tmp57 [V60 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V61 tmp58 [V61 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V62 tmp59 [V62 ] ( 2, 5 ) struct ( 8) [rbp-E8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V63 tmp60 [V63,T05] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V64 tmp61 [V64 ] ( 2, 10 ) struct ( 8) [rbp-F0H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V65 tmp62 [V65 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V66 tmp63 [V66 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V67 tmp64 [V67 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V68 tmp65 [V68,T13] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V69 tmp66 [V69 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V70 tmp67 [V70 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V71 tmp68 [V71 ] ( 2, 5 ) struct ( 8) [rbp-F8H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V72 tmp69 [V72,T06] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V73 tmp70 [V73 ] ( 2, 10 ) struct ( 8) [rbp-100H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V74 tmp71 [V74 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V75 tmp72 [V75 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V76 tmp73 [V76 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V77 tmp74 [V77,T14] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V78 tmp75 [V78 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V79 tmp76 [V79,T39] ( 3, 3 ) simd16 -> [rbp-110H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V80 tmp77 [V80,T32] ( 3, 6 ) simd16 -> [rbp-120H] do-not-enreg[SF] "Inlining Arg"
-;* V81 tmp78 [V81 ] ( 0, 0 ) struct ( 8) zero-ref "impAppendStmt"
-;* V82 tmp79 [V82 ] ( 0, 0 ) struct ( 8) zero-ref "spilled call-like call argument"
-;* V83 tmp80 [V83 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V84 tmp81 [V84 ] ( 2, 5 ) struct ( 8) [rbp-128H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V85 tmp82 [V85,T07] ( 6, 21 ) int -> rax "Inline stloc first use temp"
-; V86 tmp83 [V86 ] ( 2, 10 ) struct ( 8) [rbp-130H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V87 tmp84 [V87 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V88 tmp85 [V88 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V89 tmp86 [V89 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V90 tmp87 [V90,T15] ( 2, 8 ) ubyte -> rcx "Inline return value spill temp"
-;* V91 tmp88 [V91 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-;* V92 tmp89 [V92 ] ( 0, 0 ) struct ( 8) zero-ref "Inlining Arg"
-; V93 tmp90 [V93 ] ( 2, 5 ) struct ( 8) [rbp-138H] do-not-enreg[XS] addr-exposed ld-addr-op "Inline ldloca(s) first use temp"
-; V94 tmp91 [V94,T08] ( 6, 21 ) int -> rcx "Inline stloc first use temp"
-; V95 tmp92 [V95 ] ( 2, 10 ) struct ( 8) [rbp-140H] do-not-enreg[XS] addr-exposed ld-addr-op "Inlining Arg"
-;* V96 tmp93 [V96 ] ( 0, 0 ) ubyte -> zero-ref "Inline stloc first use temp"
-;* V97 tmp94 [V97 ] ( 0, 0 ) ubyte -> zero-ref "Inline return value spill temp"
-;* V98 tmp95 [V98 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V99 tmp96 [V99,T16] ( 2, 8 ) ubyte -> rdx "Inline return value spill temp"
-;* V100 tmp97 [V100 ] ( 0, 0 ) byref -> zero-ref "Inline stloc first use temp"
-; V101 tmp98 [V101,T40] ( 3, 3 ) simd16 -> [rbp-150H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-;* V102 tmp99 [V102 ] ( 0, 0 ) simd32 -> zero-ref "Inline return value spill temp"
-;* V103 tmp100 [V103 ] ( 0, 0 ) simd32 -> zero-ref "Inline stloc first use temp"
-; V104 tmp101 [V104,T41] ( 3, 3 ) simd64 -> [rbp-1B0H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
-; V105 tmp102 [V105,T19] ( 2, 2 ) long -> rax "field V10._00 (fldOffset=0x0)" P-INDEP
-; V106 tmp103 [V106,T20] ( 2, 2 ) long -> rcx "field V11._00 (fldOffset=0x0)" P-INDEP
-;* V107 tmp104 [V107 ] ( 0, 0 ) long -> zero-ref "field V12._00 (fldOffset=0x0)" P-INDEP
-; V108 tmp105 [V108 ] ( 2, 5 ) long -> [rbp-48H] do-not-enreg[X] addr-exposed "field V13._00 (fldOffset=0x0)" P-DEP
-; V109 tmp106 [V109 ] ( 2, 9 ) long -> [rbp-50H] do-not-enreg[X] addr-exposed "field V15._00 (fldOffset=0x0)" P-DEP
-;* V110 tmp107 [V110 ] ( 0, 0 ) long -> zero-ref "field V21._00 (fldOffset=0x0)" P-INDEP
-; V111 tmp108 [V111 ] ( 2, 5 ) long -> [rbp-58H] do-not-enreg[X] addr-exposed "field V22._00 (fldOffset=0x0)" P-DEP
-; V112 tmp109 [V112 ] ( 2, 9 ) long -> [rbp-60H] do-not-enreg[X] addr-exposed "field V24._00 (fldOffset=0x0)" P-DEP
-; V113 tmp110 [V113,T21] ( 2, 2 ) long -> rax "field V32._00 (fldOffset=0x0)" P-INDEP
-; V114 tmp111 [V114,T22] ( 2, 2 ) long -> rcx "field V33._00 (fldOffset=0x0)" P-INDEP
-;* V115 tmp112 [V115 ] ( 0, 0 ) long -> zero-ref "field V34._00 (fldOffset=0x0)" P-INDEP
-; V116 tmp113 [V116 ] ( 2, 5 ) long -> [rbp-88H] do-not-enreg[X] addr-exposed "field V35._00 (fldOffset=0x0)" P-DEP
-; V117 tmp114 [V117 ] ( 2, 9 ) long -> [rbp-90H] do-not-enreg[X] addr-exposed "field V37._00 (fldOffset=0x0)" P-DEP
-;* V118 tmp115 [V118 ] ( 0, 0 ) long -> zero-ref "field V43._00 (fldOffset=0x0)" P-INDEP
-; V119 tmp116 [V119 ] ( 2, 5 ) long -> [rbp-98H] do-not-enreg[X] addr-exposed "field V44._00 (fldOffset=0x0)" P-DEP
-; V120 tmp117 [V120 ] ( 2, 9 ) long -> [rbp-A0H] do-not-enreg[X] addr-exposed "field V46._00 (fldOffset=0x0)" P-DEP
-; V121 tmp118 [V121,T23] ( 2, 2 ) long -> rax "field V59._00 (fldOffset=0x0)" P-INDEP
-; V122 tmp119 [V122,T24] ( 2, 2 ) long -> rcx "field V60._00 (fldOffset=0x0)" P-INDEP
-;* V123 tmp120 [V123 ] ( 0, 0 ) long -> zero-ref "field V61._00 (fldOffset=0x0)" P-INDEP
-; V124 tmp121 [V124 ] ( 2, 5 ) long -> [rbp-E8H] do-not-enreg[X] addr-exposed "field V62._00 (fldOffset=0x0)" P-DEP
-; V125 tmp122 [V125 ] ( 2, 9 ) long -> [rbp-F0H] do-not-enreg[X] addr-exposed "field V64._00 (fldOffset=0x0)" P-DEP
-;* V126 tmp123 [V126 ] ( 0, 0 ) long -> zero-ref "field V70._00 (fldOffset=0x0)" P-INDEP
-; V127 tmp124 [V127 ] ( 2, 5 ) long -> [rbp-F8H] do-not-enreg[X] addr-exposed "field V71._00 (fldOffset=0x0)" P-DEP
-; V128 tmp125 [V128 ] ( 2, 9 ) long -> [rbp-100H] do-not-enreg[X] addr-exposed "field V73._00 (fldOffset=0x0)" P-DEP
-; V129 tmp126 [V129,T25] ( 2, 2 ) long -> rax "field V81._00 (fldOffset=0x0)" P-INDEP
-; V130 tmp127 [V130,T26] ( 2, 2 ) long -> rcx "field V82._00 (fldOffset=0x0)" P-INDEP
-;* V131 tmp128 [V131 ] ( 0, 0 ) long -> zero-ref "field V83._00 (fldOffset=0x0)" P-INDEP
-; V132 tmp129 [V132 ] ( 2, 5 ) long -> [rbp-128H] do-not-enreg[X] addr-exposed "field V84._00 (fldOffset=0x0)" P-DEP
-; V133 tmp130 [V133 ] ( 2, 9 ) long -> [rbp-130H] do-not-enreg[X] addr-exposed "field V86._00 (fldOffset=0x0)" P-DEP
-;* V134 tmp131 [V134 ] ( 0, 0 ) long -> zero-ref "field V92._00 (fldOffset=0x0)" P-INDEP
-; V135 tmp132 [V135 ] ( 2, 5 ) long -> [rbp-138H] do-not-enreg[X] addr-exposed "field V93._00 (fldOffset=0x0)" P-DEP
-; V136 tmp133 [V136 ] ( 2, 9 ) long -> [rbp-140H] do-not-enreg[X] addr-exposed "field V95._00 (fldOffset=0x0)" P-DEP
-; V137 cse0 [V137,T00] ( 9, 33 ) int -> rsi "CSE - aggressive"
+; V04 tmp1 [V04,T01] ( 3, 6 ) int -> rax "fgMakeTemp is creating a new local variable"
+; V05 tmp2 [V05,T03] ( 2, 4 ) int -> rsi "fgMakeTemp is creating a new local variable"
+; V06 tmp3 [V06,T05] ( 2, 4 ) simd32 -> mm0 "Inlining Arg"
+; V07 tmp4 [V07,T06] ( 2, 4 ) simd32 -> mm1 "Inlining Arg"
+; V08 tmp5 [V08,T07] ( 3, 3 ) simd64 -> [rsp+00H] do-not-enreg[SF] ld-addr-op "Inline ldloca(s) first use temp"
+; V09 cse0 [V09,T08] ( 3, 3 ) simd32 -> mm1 "CSE - aggressive"
+; V10 cse1 [V10,T04] ( 3, 3 ) int -> rsi "CSE - aggressive"
;
-; Lcl frame size = 432
+; Lcl frame size = 120
G_M33171_IG01:
- push rbp
- sub rsp, 432
+ sub rsp, 120
vzeroupper
- lea rbp, [rsp+1B0H]
- ;; size=19 bbWeight=1 PerfScore 2.75
+ ;; size=7 bbWeight=1 PerfScore 1.25
G_M33171_IG02:
- vmovups ymm0, ymmword ptr [rbp+10H]
- vmovups ymmword ptr [rbp-30H], ymm0
- vmovups xmm0, xmmword ptr [rbp-30H]
- vmovaps xmmword ptr [rbp-40H], xmm0
- mov rax, qword ptr [rbp-40H]
- mov qword ptr [rbp-50H], rax
- xor eax, eax
+ vmovups ymm0, ymmword ptr [rsp+80H]
and esi, 7
- align [0 bytes for IG03]
- ;; size=33 bbWeight=1 PerfScore 11.50
-G_M33171_IG03:
- lea rcx, bword ptr [rbp-50H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-48H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG03
- ;; size=37 bbWeight=4 PerfScore 27.00
-G_M33171_IG04:
- mov rax, qword ptr [rbp-48H]
- mov rcx, qword ptr [rbp-38H]
- mov qword ptr [rbp-60H], rcx
- xor ecx, ecx
- align [0 bytes for IG05]
- ;; size=14 bbWeight=1 PerfScore 3.25
-G_M33171_IG05:
- lea rdx, bword ptr [rbp-60H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-58H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG05
- ;; size=38 bbWeight=4 PerfScore 27.00
-G_M33171_IG06:
- mov rcx, qword ptr [rbp-58H]
- mov qword ptr [rbp-70H], rax
- mov qword ptr [rbp-68H], rcx
- vmovaps xmm0, xmmword ptr [rbp-70H]
- vmovups xmm1, xmmword ptr [rbp-20H]
- vmovaps xmmword ptr [rbp-80H], xmm1
- mov rax, qword ptr [rbp-80H]
- mov qword ptr [rbp-90H], rax
- xor eax, eax
- align [0 bytes for IG07]
- ;; size=40 bbWeight=1 PerfScore 12.25
-G_M33171_IG07:
- lea rcx, bword ptr [rbp-90H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-88H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG07
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG08:
- mov rax, qword ptr [rbp-88H]
- mov rcx, qword ptr [rbp-78H]
- mov qword ptr [rbp-A0H], rcx
- xor ecx, ecx
- align [0 bytes for IG09]
- ;; size=20 bbWeight=1 PerfScore 3.25
-G_M33171_IG09:
- lea rdx, bword ptr [rbp-A0H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-98H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG09
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG10:
- mov rcx, qword ptr [rbp-98H]
- mov qword ptr [rbp-B0H], rax
- mov qword ptr [rbp-A8H], rcx
- vinserti128 ymm0, ymm0, xmmword ptr [rbp-B0H], 1
- vmovups ymm1, ymmword ptr [rbp+30H]
- vmovups ymmword ptr [rbp-D0H], ymm1
- vmovups xmm1, xmmword ptr [rbp-D0H]
- vmovaps xmmword ptr [rbp-E0H], xmm1
- mov rax, qword ptr [rbp-E0H]
- mov qword ptr [rbp-F0H], rax
- xor eax, eax
- align [0 bytes for IG11]
- ;; size=76 bbWeight=1 PerfScore 18.25
-G_M33171_IG11:
- lea rcx, bword ptr [rbp-F0H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-E8H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG11
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG12:
- mov rax, qword ptr [rbp-E8H]
- mov rcx, qword ptr [rbp-D8H]
- mov qword ptr [rbp-100H], rcx
- xor ecx, ecx
- align [0 bytes for IG13]
- ;; size=23 bbWeight=1 PerfScore 3.25
-G_M33171_IG13:
- lea rdx, bword ptr [rbp-100H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-F8H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG13
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG14:
- mov rcx, qword ptr [rbp-F8H]
- mov qword ptr [rbp-110H], rax
- mov qword ptr [rbp-108H], rcx
- vmovaps xmm1, xmmword ptr [rbp-110H]
- vmovups xmm2, xmmword ptr [rbp-C0H]
- vmovaps xmmword ptr [rbp-120H], xmm2
- mov rax, qword ptr [rbp-120H]
- mov qword ptr [rbp-130H], rax
- xor eax, eax
- align [0 bytes for IG15]
- ;; size=61 bbWeight=1 PerfScore 12.25
-G_M33171_IG15:
- lea rcx, bword ptr [rbp-130H]
- movsxd rdx, eax
- movzx rcx, byte ptr [rcx+rdx]
- shrx ecx, ecx, esi
- movzx rcx, cl
- lea rdx, bword ptr [rbp-128H]
- movsxd r8, eax
- mov byte ptr [rdx+r8], cl
- inc eax
- cmp eax, 8
- jl SHORT G_M33171_IG15
- ;; size=43 bbWeight=4 PerfScore 27.00
-G_M33171_IG16:
- mov rax, qword ptr [rbp-128H]
- mov rcx, qword ptr [rbp-118H]
- mov qword ptr [rbp-140H], rcx
- xor ecx, ecx
- align [7 bytes for IG17]
- ;; size=30 bbWeight=1 PerfScore 3.50
-G_M33171_IG17:
- lea rdx, bword ptr [rbp-140H]
- movsxd r8, ecx
- movzx rdx, byte ptr [rdx+r8]
- shrx edx, edx, esi
- movzx rdx, dl
- lea r8, bword ptr [rbp-138H]
- movsxd r9, ecx
- mov byte ptr [r8+r9], dl
- inc ecx
- cmp ecx, 8
- jl SHORT G_M33171_IG17
- ;; size=44 bbWeight=4 PerfScore 27.00
-G_M33171_IG18:
- mov rcx, qword ptr [rbp-138H]
- mov qword ptr [rbp-150H], rax
- mov qword ptr [rbp-148H], rcx
- vinserti128 ymm1, ymm1, xmmword ptr [rbp-150H], 1
- vmovups ymmword ptr [rbp-1B0H], ymm0
- vmovups ymmword ptr [rbp-190H], ymm1
- vmovups zmm0, zmmword ptr [rbp-1B0H]
+ mov eax, esi
+ vmovd xmm1, rax
+ vpsrld ymm0, ymm0, ymm1
+ mov ecx, 255
+ shrx eax, ecx, eax
+ vpbroadcastb ymm1, eax
+ vpand ymm0, ymm0, ymm1
+ vmovups ymm2, ymmword ptr [rsp+A0H]
+ vmovd xmm3, rsi
+ vpsrld ymm2, ymm2, ymm3
+ vpand ymm1, ymm2, ymm1
+ vmovups ymmword ptr [rsp], ymm0
+ vmovups ymmword ptr [rsp+20H], ymm1
+ vmovups zmm0, zmmword ptr [rsp]
vmovups zmmword ptr [rdi], zmm0
mov rax, rdi
- ;; size=66 bbWeight=1 PerfScore 14.25
-G_M33171_IG19:
+ ;; size=90 bbWeight=1 PerfScore 29.17
+G_M33171_IG03:
vzeroupper
- add rsp, 432
- pop rbp
+ add rsp, 120
ret
- ;; size=12 bbWeight=1 PerfScore 2.75
+ ;; size=8 bbWeight=1 PerfScore 2.25
-; Total bytes of code 730, prolog size 19, PerfScore 376.25, instruction count 166, allocated bytes for code 730 (MethodHash=ecb57e6c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts)
+; Total bytes of code 105, prolog size 7, PerfScore 43.17, instruction count 23, allocated bytes for code 105 (MethodHash=ecb57e6c) for method System.Runtime.Intrinsics.Vector512:ShiftRightLogical(System.Runtime.Intrinsics.Vector512`1[ubyte],int):System.Runtime.Intrinsics.Vector512`1[ubyte] (FullOpts) Full list of diffs: https://gist.github.com/MihuBot/76bbbe3b981db9de6c49786af2658ac2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Build completed in 1 hour 28 minutes.
dotnet/runtime#86841
CoreLib diffs
Frameworks diffs
Diffs
Artifacts:
The text was updated successfully, but these errors were encountered: