-
Notifications
You must be signed in to change notification settings - Fork 13.8k
[AArch64] Avoid single-element vector fp converts in streaming[-compatible] functions #112213
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py | ||
; RUN: llc -force-streaming-compatible < %s | FileCheck %s | ||
; RUN: llc < %s | FileCheck %s --check-prefix=NON-STREAMING | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
define double @t1(double %x) { | ||
; CHECK-LABEL: t1: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvtzs x8, d0 | ||
; CHECK-NEXT: scvtf d0, x8 | ||
Comment on lines
+10
to
+11
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This operation may be expensive when executed in streaming mode, because the operations move results between register files (FPR <-> GPR). Could you emulate this operation with SVE instructions instead? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll look into that in a follow up patch 👍 I need to check how to do something ~equivalent for SVE since for Neon this is done via a load of .td patterns (matching both the to/from int parts). |
||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t1: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvtzs d0, d0 | ||
; NON-STREAMING-NEXT: scvtf d0, d0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptosi double %x to i64 | ||
%conv1 = sitofp i64 %conv to double | ||
ret double %conv1 | ||
} | ||
|
||
define float @t2(float %x) { | ||
; CHECK-LABEL: t2: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvtzs w8, s0 | ||
; CHECK-NEXT: scvtf s0, w8 | ||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t2: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvtzs s0, s0 | ||
; NON-STREAMING-NEXT: scvtf s0, s0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptosi float %x to i32 | ||
%conv1 = sitofp i32 %conv to float | ||
ret float %conv1 | ||
} | ||
|
||
define half @t3(half %x) { | ||
; CHECK-LABEL: t3: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvt s0, h0 | ||
; CHECK-NEXT: fcvtzs w8, s0 | ||
; CHECK-NEXT: scvtf s0, w8 | ||
; CHECK-NEXT: fcvt h0, s0 | ||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t3: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvt s0, h0 | ||
; NON-STREAMING-NEXT: fcvtzs s0, s0 | ||
; NON-STREAMING-NEXT: scvtf s0, s0 | ||
; NON-STREAMING-NEXT: fcvt h0, s0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptosi half %x to i32 | ||
%conv1 = sitofp i32 %conv to half | ||
ret half %conv1 | ||
} | ||
|
||
define double @t4(double %x) { | ||
; CHECK-LABEL: t4: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvtzu x8, d0 | ||
; CHECK-NEXT: ucvtf d0, x8 | ||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t4: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvtzu d0, d0 | ||
; NON-STREAMING-NEXT: ucvtf d0, d0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptoui double %x to i64 | ||
%conv1 = uitofp i64 %conv to double | ||
ret double %conv1 | ||
} | ||
|
||
define float @t5(float %x) { | ||
; CHECK-LABEL: t5: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvtzu w8, s0 | ||
; CHECK-NEXT: ucvtf s0, w8 | ||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t5: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvtzu s0, s0 | ||
; NON-STREAMING-NEXT: ucvtf s0, s0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptoui float %x to i32 | ||
%conv1 = uitofp i32 %conv to float | ||
ret float %conv1 | ||
} | ||
|
||
define half @t6(half %x) { | ||
; CHECK-LABEL: t6: | ||
; CHECK: // %bb.0: // %entry | ||
; CHECK-NEXT: fcvt s0, h0 | ||
; CHECK-NEXT: fcvtzu w8, s0 | ||
; CHECK-NEXT: ucvtf s0, w8 | ||
; CHECK-NEXT: fcvt h0, s0 | ||
; CHECK-NEXT: ret | ||
; | ||
; NON-STREAMING-LABEL: t6: | ||
; NON-STREAMING: // %bb.0: // %entry | ||
; NON-STREAMING-NEXT: fcvt s0, h0 | ||
; NON-STREAMING-NEXT: fcvtzu s0, s0 | ||
; NON-STREAMING-NEXT: ucvtf s0, s0 | ||
; NON-STREAMING-NEXT: fcvt h0, s0 | ||
; NON-STREAMING-NEXT: ret | ||
entry: | ||
%conv = fptoui half %x to i32 | ||
%conv1 = uitofp i32 %conv to half | ||
ret half %conv1 | ||
} |
Uh oh!
There was an error while loading. Please reload this page.