-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Set DOES_NOT_RETURN gtCallMoreFlags in fgFindJumpTargets #6742
Conversation
I'm not sure what you are trying to do. It's So, we would probably need to move return counting to And even if we do that there's still the possibility to find more returns than there are because |
Adding the check to
But does produce the moved calls: ; Assembly listing for method ArraySegment`1:.ctor(ref,int,int):this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 this [V00,T00] ( 5, 5 ) byref -> rsi this
;* V01 TypeCtx [V01 ] ( 0, 0 ) long -> zero-ref
; V02 arg1 [V02,T01] ( 5, 5 ) ref -> r8
; V03 arg2 [V03,T02] ( 5, 5 ) int -> rdi
; V04 arg3 [V04,T03] ( 3, 3 ) int -> rbx
; V05 loc0 [V05 ] ( 1, 1 ) lclBlk (32) [rsp+0x00]
;
; Lcl frame size = 32
G_M41507_IG01:
push rdi
push rsi
push rbx
sub rsp, 32
mov rsi, rcx
mov edi, r9d
mov ebx, dword ptr [rsp+60H]
G_M41507_IG02:
test r8, r8
je G_M41507_IG08
G_M41507_IG03:
test edi, edi
jl G_M41507_IG09
G_M41507_IG04:
test ebx, ebx
jl G_M41507_IG10
G_M41507_IG05:
mov edx, dword ptr [r8+8]
sub edx, edi
cmp edx, ebx
jl G_M41507_IG11
G_M41507_IG06:
mov rcx, rsi
mov rdx, r8
call CORINFO_HELP_CHECKED_ASSIGN_REF
mov dword ptr [rsi+8], edi
mov dword ptr [rsi+12], ebx
G_M41507_IG07:
add rsp, 32
pop rbx
pop rsi
pop rdi
ret
************** Beginning of cold code **************
G_M41507_IG08:
mov ecx, 3
call ThrowHelper:ThrowArgumentNullException(int)
G_M41507_IG09:
mov ecx, 41
mov edx, 4
call ThrowHelper:ThrowArgumentOutOfRangeException(int,int)
G_M41507_IG10:
mov ecx, 16
mov edx, 4
call ThrowHelper:ThrowArgumentOutOfRangeException(int,int)
G_M41507_IG11:
mov ecx, 23
call ThrowHelper:ThrowArgumentException(int)
int3
; Total bytes of code 132, prolog size 7 for method ArraySegment`1:.ctor(ref,int,int):this Will clean it up, and add commit. |
@mikedn did you mean something like that? |
With #6634
Changes to
|
With current branch
|
With ; Assembly listing for method List`1:CopyTo(int,ref,int,int):this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 this [V00,T00] ( 4, 4 ) ref -> rsi this
; V01 arg1 [V01,T01] ( 4, 4 ) int -> rdi
; V02 arg2 [V02,T02] ( 3, 3 ) ref -> rbx
; V03 arg3 [V03,T03] ( 3, 3 ) int -> rbp
; V04 arg4 [V04,T05] ( 2, 2 ) int -> r14
; V05 tmp0 [V05,T04] ( 2, 4 ) ref -> rcx
; V06 OutArgs [V06 ] ( 1, 1 ) lclBlk (48) [rsp+0x00]
;
; Lcl frame size = 48
G_M45633_IG01:
push r14
push rdi
push rsi
push rbp
push rbx
sub rsp, 48
mov rsi, rcx
mov edi, edx
mov rbx, r8
mov ebp, r9d
mov r14d, dword ptr [rsp+80H]
G_M45633_IG02:
mov ecx, dword ptr [rsi+24]
sub ecx, edi
cmp ecx, r14d
jge SHORT G_M45633_IG03
mov ecx, 23
call ThrowHelper:ThrowArgumentException(int)
G_M45633_IG03:
mov rcx, gword ptr [rsi+8]
mov dword ptr [rsp+20H], r14d
xor edx, edx
mov dword ptr [rsp+28H], edx
mov edx, edi
mov r8, rbx
mov r9d, ebp
call Array:Copy(ref,int,ref,int,int,bool)
nop
G_M45633_IG04:
add rsp, 48
pop rbx
pop rbp
pop rsi
pop rdi
pop r14
ret
; Total bytes of code 89, prolog size 10 for method List`1:CopyTo(int,ref,int,int):this To ; Assembly listing for method List`1:CopyTo(int,ref,int,int):this
; Emitting BLENDED_CODE for X64 CPU with SSE2
; optimized code
; rsp based frame
; partially interruptible
; Final local variable assignments
;
; V00 this [V00,T00] ( 4, 4 ) ref -> rcx this
; V01 arg1 [V01,T01] ( 4, 4 ) int -> rdx
; V02 arg2 [V02,T02] ( 3, 3 ) ref -> r8
; V03 arg3 [V03,T03] ( 3, 3 ) int -> r9
; V04 arg4 [V04,T05] ( 2, 2 ) int -> rax
; V05 tmp0 [V05,T04] ( 2, 4 ) ref -> rcx
; V06 OutArgs [V06 ] ( 1, 1 ) lclBlk (48) [rsp+0x00]
;
; Lcl frame size = 56
G_M45633_IG01:
sub rsp, 56
nop
mov eax, dword ptr [rsp+60H]
G_M45633_IG02:
mov r10d, dword ptr [rcx+24]
sub r10d, edx
cmp r10d, eax
jl G_M45633_IG05
G_M45633_IG03:
mov rcx, gword ptr [rcx+8]
mov dword ptr [rsp+20H], eax
xor eax, eax
mov dword ptr [rsp+28H], eax
call Array:Copy(ref,int,ref,int,int,bool)
nop
G_M45633_IG04:
add rsp, 56
ret
************** Beginning of cold code **************
G_M45633_IG05:
mov ecx, 23
call ThrowHelper:ThrowArgumentException(int)
int3
; Total bytes of code 61, prolog size 5 for method List`1:CopyTo(int,ref,int,int):this
|
Yes, that's normal. Currently the reason why inlining fails is irrelevant, it can be "no return" or any other. What matters is marking the call node as "no return". |
@@ -4857,7 +4859,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, BYTE* | |||
fgObserveInlineConstants(opcode, pushedStack, isInlining); | |||
} | |||
break; | |||
|
|||
case CEE_RET: | |||
retBlocks++; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As already mentioned, this is not enough. We need to handle tail calls as well, they count as returns.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is always including CEE_JMP
enough? (made change)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nope, CEE_JMP
is needed but we also need to look for calls with a tailcall prefix.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well out of my depth now... rename to retBlocks
->potentialRetBlocks
and include CEE_TAILCALL
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Or does the called function need to be checked?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Note sure, need to look more at what fgMakeBasicBlocks
is doing. Maybe we can actually ignore the .tail
prefix, the IL code should contain a ret
anyway for the method to be valid.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, there has to be a ret
after a tail.call
. If there's no ret
then the JIT fails to compile the method so we don't need to be concerned about what happens if someone feeds the JIT IL code that fails to conform to the ECMA spec. An invalid method would be flagged as "no return" and that's OK, calling such a method would result in an InvalidProgramException
being thrown.
So counting CEE_RET
and CEE_JMP
is enough, if there aren't any then the method doesn't return (either because it throws or because it contains an infinite loop).
Heh, that |
Reverted to only make the observations when doing inlining observations; and seems to produce the same outcomes. |
The outcomes are the same with the later changes (so List1:CopyTo is still much improved); it doesn't seem to precompile
Lot of repeats of the same block in the cold code in new (no cold code in old) |
Yeah, I've seen that monster before 😄 |
d44d011
to
c65d749
Compare
Squashed... @mikedn I assume deduping the cold chunks wouldn't be straightforward? Would be nice to convert the 30 identical ones to 1... |
|
Think will leave alone... |
I don't think there's anything in the JIT that could do that at the moment. Normally it would be relatively expensive to do as the compiler would need to compare the blocks to figure out that they are identical. But given in this particular case it would be easy to tell that they are identical - they come from the same method and same IL offset. Anyway, this is an entirely different story.
Tail duplication is sort of the opposite - if there are multiple jumps to the same block then the jumps are replaced with the block's code. This eliminates the jumps at the cost of increased code size so it's done only when the block's code is small. |
// Mark the call node as "no return" as it can impact caller's code quality. | ||
impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; | ||
|
||
if (compInlineResult->IsFailure()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The IsFailure
check should come after NoteBool
, otherwise it is useless.
Considering that CALLEE_DOES_NOT_RETURN
doesn't currently cause inlining to be aborted right away it might be best to leave this stuff out, the existing "no return" code will do this anyway. All we really need here is to mark the call node as "no return".
Also, it might be good to add an assert in the existing "no return" code to ensure that if fgFindJumpTargets
marked the call as "no return" then there really are no BBJ_RETURN
blocks in the method.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can add an assert in EnhancedLegacyPolicy::NoteBool
, but ideally would check it immediately after fgMakeBasicBlocks
, but not sure I can inspect the observation there?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can add an assert in EnhancedLegacyPolicy::NoteBool
Note there, the assert should be in fgFindBasicBlocks where the rest of the no return code is.
but not sure I can inspect the observation there
This has nothing to do with the inline observation, it has to do with the call flag.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something like?
unsigned retBlocks = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
noway_assert(retBlocks > 0 && ((impInlineInfo->iciCall->gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0));
// Mark the call node as "no return" as it can impact caller's code quality. | ||
impInlineInfo->iciCall->gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN; | ||
} | ||
compInlineResult->NoteBool(InlineObservation::CALLEE_DOES_NOT_RETURN, true); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not needed.
4ac938c
to
9fa2dca
Compare
@dotnet/jit-contrib PTAL |
Renamed both issue and PR to be more functionally descriptive. Seem ok? |
"prejit" is ngen/crossgen.
👍 |
Not sure why it segfaults on Ubuntu/osx checked and CentOS7.1 debug, guessing that would suggests Or something completely unrealted... |
Yep, It's strange that this doesn't crash the Windows builds as well. |
@dotnet-bot test Windows_NT x64 Release Priority 1 Build and Test |
Merge clash with #6793 |
@mikedn got there in the end, thank you for all your help 😄 |
@erozenfeld we had clash in |
|
||
if (compIsForInlining()) | ||
{ | ||
if (compInlineResult->IsFailure()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Uses fail after setting no return flag
It's probably best to leave that conditional return where it was for the sake of clarity, it communicates the fact that We could remove the conditional return that's after the |
@erozenfeld @mikedn added the fail back |
@dotnet-bot test Windows_NT x64 Debug Build and Test |
@AndyAyersMS any thoughts about this? Most of the major items it effects have been merged:
|
@AndyAyersMS is out at the moment--@kyulee1, can you take a look? |
d1d4093
to
7cb8db3
Compare
Rebased |
Hmmm.. this doesn't seem to work anymore and leaves |
Replacing with #7580 |
Adds
gtCallMoreFlags |= GTF_CALL_M_DOES_NOT_RETURN
tofgFindJumpTargets
which happens in early in pipeline (which may mark the method as uninlinable, so prevent the later setting infgFindBasicBlocks
).Count the BBJ_RETURNs in
fgMakeBasicBlocks
rather than recounting afterwards (perf)With current branch -0.88 % diff improvement
With #6634 from -1.44% to -2.49%
Resolves https://github.com/dotnet/coreclr/issues/6744
@mikedn @AndyAyersMS @kyulee1