Skip to content

[AArch64][SME] Fix generating incorrect TBZ when lowering lazy save. #68429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1005,10 +1005,12 @@ AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
// expected value for the callee (0 for a normal callee and 1 for a streaming
// callee).
auto PStateSM = MI.getOperand(2).getReg();
auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
bool IsStreamingCallee = MI.getOperand(3).getImm();
unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
MachineInstrBuilder Tbx =
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);

// Split MBB and create two new blocks:
// - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ define void @streaming_compatible() #0 {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB0_2
; CHECK-NEXT: tbz w19, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: bl non_streaming
; CHECK-NEXT: tbz x19, #0, .LBB0_4
; CHECK-NEXT: tbz w19, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_4:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Original file line number Diff line number Diff line change
Expand Up @@ -413,14 +413,14 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: and x19, x0, #0x1
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: .LBB12_2:
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_4
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: .LBB12_4:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -134,12 +134,12 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_z
; CHECK-NEXT: msr TPIDR2_EL0, x9
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB3_2
; CHECK-NEXT: tbz w19, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB3_2:
; CHECK-NEXT: bl private_za_callee
; CHECK-NEXT: tbz x19, #0, .LBB3_4
; CHECK-NEXT: tbz w19, #0, .LBB3_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB3_4:
Expand Down Expand Up @@ -187,12 +187,12 @@ define void @za_shared_caller_za_preserved_callee() nounwind "aarch64_pstate_za_
; CHECK-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
; CHECK-NEXT: bl private_za_preserved_callee
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,12 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB1_2
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB1_2:
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: tbz x19, #0, .LBB1_4
; CHECK-NEXT: tbz w19, #0, .LBB1_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB1_4:
Expand Down Expand Up @@ -79,12 +79,12 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB2_2
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB2_2:
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz x19, #0, .LBB2_4
; CHECK-NEXT: tbnz w19, #0, .LBB2_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB2_4:
Expand Down Expand Up @@ -134,7 +134,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB4_2
; CHECK-NEXT: tbz w19, #0, .LBB4_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB4_2:
Expand All @@ -143,7 +143,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: bl normal_callee_vec_arg
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB4_4
; CHECK-NEXT: tbz w19, #0, .LBB4_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB4_4:
Expand Down Expand Up @@ -204,14 +204,14 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB5_2
; CHECK-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: bl normal_callee_scalable_vec_arg
; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB5_4
; CHECK-NEXT: tbz w19, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
Expand Down Expand Up @@ -296,14 +296,14 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB6_2
; CHECK-NEXT: tbz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB6_2:
; CHECK-NEXT: ldr p0, [sp, #7, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: bl normal_callee_predicate_vec_arg
; CHECK-NEXT: str p0, [sp, #6, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: tbz x19, #0, .LBB6_4
; CHECK-NEXT: tbz w19, #0, .LBB6_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB6_4:
Expand Down Expand Up @@ -360,7 +360,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB7_2
; CHECK-NEXT: tbnz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB7_2:
Expand All @@ -381,12 +381,12 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbnz x19, #0, .LBB8_3
; CHECK-NEXT: tbnz w19, #0, .LBB8_3
; CHECK-NEXT: // %bb.2: // %if.then
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_3: // %if.then
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz x19, #0, .LBB8_5
; CHECK-NEXT: tbnz w19, #0, .LBB8_5
; CHECK-NEXT: // %bb.4: // %if.then
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB8_5: // %if.then
Expand Down Expand Up @@ -417,12 +417,12 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: and x19, x0, #0x1
; CHECK-NEXT: tbz x19, #0, .LBB9_2
; CHECK-NEXT: tbz w19, #0, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB9_2:
; CHECK-NEXT: bl normal_callee
; CHECK-NEXT: tbz x19, #0, .LBB9_4
; CHECK-NEXT: tbz w19, #0, .LBB9_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB9_4:
Expand Down