-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TLI][AArch64] Add TLI Mappings of @llvm.exp10 for ArmPL and SLEEF. #72990
[TLI][AArch64] Add TLI Mappings of @llvm.exp10 for ArmPL and SLEEF. #72990
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-llvm-transforms Author: Paschalis Mpeis (paschalis-mpeis) Changes
Patch is 27.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72990.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index b2e989e4013ea00..c628e72b24d12b0 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -492,6 +492,7 @@ TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
@@ -544,6 +545,7 @@ TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -609,6 +611,8 @@ TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -753,6 +757,11 @@ TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index 127514961f48dfd..a38d4a53407c5d2 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -192,6 +192,50 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
ret <vscale x 4 x float> %1
}
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
+
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
+; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+ ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
+; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+ ret <4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
+; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+;
+ %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+ ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
+; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+;
+ %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+ ret <vscale x 4 x float> %1
+}
declare <2 x double> @llvm.log.v2f64(<2 x double>)
declare <4 x float> @llvm.log.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
index f05af5268e957e1..8b06c41bcb1a6d1 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -95,6 +95,24 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
ret <vscale x 4 x float> %1
}
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+;
+ %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+ ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+;
+ %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+ ret <vscale x 4 x float> %1
+}
+
define <vscale x 2 x double> @llvm_fabs_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_fabs_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
@@ -375,6 +393,8 @@ declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
index 5dd87a4bb29550c..cedb7dd85149d00 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
@@ -96,6 +96,24 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
ret <4 x float> %1
}
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+ ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+ ret <4 x float> %1
+}
+
define <2 x double> @llvm_fabs_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_fabs_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[IN:%.*]])
@@ -376,6 +394,8 @@ declare <2 x double> @llvm.exp.v2f64(<2 x double>)
declare <4 x float> @llvm.exp.v4f32(<4 x float>)
declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <2 x double> @llvm.floor.v2f64(<2 x double>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
index 0a27c74782ccba1..03d959c928577d5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
@@ -161,6 +161,57 @@ define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
ret void
}
+declare double @llvm.exp10.f64(double)
+declare float @llvm.exp10.f32(float)
+
+define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
+; CHECK-LABEL: @exp10_f64(
+; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.*]])
+; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
+; CHECK: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+ %in = load double, ptr %in.gep, align 8
+ %call = tail call double @llvm.exp10.f64(double %in)
+ %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+ store double %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
+; CHECK-LABEL: @exp10_f32(
+; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.*]])
+; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
+; CHECK: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+ %in = load float, ptr %in.gep, align 8
+ %call = tail call float @llvm.exp10.f32(float %in)
+ %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+ store float %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @llvm.log.f64(double)
declare float @llvm.log.f32(float)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
index 715c2c352b7762b..14b7fd28b38e197 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
@@ -334,6 +334,71 @@ define void @llvm_exp2_f32(float* nocapture %varray) {
ret void
}
+declare double @llvm.exp10.f64(double)
+declare float @llvm.exp10.f32(float)
+
+define void @llvm_exp10_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp10_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP1:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @llvm_exp10_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.exp10.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @llvm_exp10_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp10_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP1:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @llvm_exp10_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.exp10.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @llvm.fabs.f64(double)
declare float @llvm.fabs.f32(float)
@@ -537,13 +602,13 @@ define void @llvm_log_f64(double* nocapture %varray) {
; NEON-LABEL: define void @llvm_log_f64
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f64
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
; SVE: ret void
;
entry:
@@ -568,13 +633,13 @@ define void @llvm_log_f32(float* nocapture %varray) {
; NEON-LABEL: define void @llvm_log_f32
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f32
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
; SVE: ret void
;
entry:
@@ -602,13 +667,13 @@ define void @llvm_log10_f64(double* nocapture %varray) {
; NEON-LABEL: define void @llvm_log10_f64
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f64
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
; SVE: ret void
;
entry:
@@ -633,13 +698,13 @@ define void @llvm_log10_f32(float* nocapture %varray) {
; NEON-LABEL: define void @llvm_log10_f32
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2...
[truncated]
|
@llvm/pr-subscribers-llvm-analysis Author: Paschalis Mpeis (paschalis-mpeis) Changes
Patch is 27.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72990.diff 6 Files Affected:
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index b2e989e4013ea00..c628e72b24d12b0 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -492,6 +492,7 @@ TLI_DEFINE_VECFUNC( "exp2", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "llvm.exp2.f64", "_ZGVnN2v_exp2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "exp10", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC( "llvm.exp10.f64", "_ZGVnN2v_exp10", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "lgamma", "_ZGVnN2v_lgamma", FIXED(2), "_ZGV_LLVM_N2v")
@@ -544,6 +545,7 @@ TLI_DEFINE_VECFUNC( "exp2f", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "llvm.exp2.f32", "_ZGVnN4v_exp2f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "exp10f", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC( "llvm.exp10.f32", "_ZGVnN4v_exp10f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "lgammaf", "_ZGVnN4v_lgammaf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -609,6 +611,8 @@ TLI_DEFINE_VECFUNC("llvm.exp2.f32", "_ZGVsMxv_exp2f", SCALABLE(4), MASKED, "_ZGV
TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "_ZGVsMxv_exp10", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED, "_ZGVsMxvv")
@@ -753,6 +757,11 @@ TLI_DEFINE_VECFUNC("exp10f", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N
TLI_DEFINE_VECFUNC("exp10", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("exp10f", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_vexp10q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_vexp10q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("llvm.exp10.f64", "armpl_svexp10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("llvm.exp10.f32", "armpl_svexp10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+
TLI_DEFINE_VECFUNC("expm1", "armpl_vexpm1q_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("expm1f", "armpl_vexpm1q_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("expm1", "armpl_svexpm1_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index 127514961f48dfd..a38d4a53407c5d2 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
@@ -192,6 +192,50 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) #0 {
ret <vscale x 4 x float> %1
}
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
+
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: define <2 x double> @llvm_exp10_f64
+; CHECK-SAME: (<2 x double> [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @armpl_vexp10q_f64(<2 x double> [[IN]])
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+ ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: define <4 x float> @llvm_exp10_f32
+; CHECK-SAME: (<4 x float> [[IN:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @armpl_vexp10q_f32(<4 x float> [[IN]])
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+ ret <4 x float> %1
+}
+
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @llvm_exp10_vscale_f64
+; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+;
+ %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+ ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @llvm_exp10_vscale_f32
+; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+;
+ %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+ ret <vscale x 4 x float> %1
+}
declare <2 x double> @llvm.log.v2f64(<2 x double>)
declare <4 x float> @llvm.log.v4f32(<4 x float>)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
index f05af5268e957e1..8b06c41bcb1a6d1 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -95,6 +95,24 @@ define <vscale x 4 x float> @llvm_exp2_vscale_f32(<vscale x 4 x float> %in) {
ret <vscale x 4 x float> %1
}
+define <vscale x 2 x double> @llvm_exp10_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
+; CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+;
+ %1 = call fast <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double> %in)
+ ret <vscale x 2 x double> %1
+}
+
+define <vscale x 4 x float> @llvm_exp10_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_vscale_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> [[IN:%.*]])
+; CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+;
+ %1 = call fast <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float> %in)
+ ret <vscale x 4 x float> %1
+}
+
define <vscale x 2 x double> @llvm_fabs_vscale_f64(<vscale x 2 x double> %in) {
; CHECK-LABEL: @llvm_fabs_vscale_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double> [[IN:%.*]])
@@ -375,6 +393,8 @@ declare <vscale x 2 x double> @llvm.exp.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.exp.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.exp2.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.exp2.nxv4f32(<vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.exp10.nxv2f64(<vscale x 2 x double>)
+declare <vscale x 4 x float> @llvm.exp10.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.fabs.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.fabs.nxv4f32(<vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.floor.nxv2f64(<vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
index 5dd87a4bb29550c..cedb7dd85149d00 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [14 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
+; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
@@ -96,6 +96,24 @@ define <4 x float> @llvm_exp2_f32(<4 x float> %in) {
ret <4 x float> %1
}
+define <2 x double> @llvm_exp10_f64(<2 x double> %in) {
+; CHECK-LABEL: @llvm_exp10_f64(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @_ZGVnN2v_exp10(<2 x double> [[IN:%.*]])
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = call fast <2 x double> @llvm.exp10.v2f64(<2 x double> %in)
+ ret <2 x double> %1
+}
+
+define <4 x float> @llvm_exp10_f32(<4 x float> %in) {
+; CHECK-LABEL: @llvm_exp10_f32(
+; CHECK-NEXT: [[TMP1:%.*]] = call fast <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[IN:%.*]])
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = call fast <4 x float> @llvm.exp10.v4f32(<4 x float> %in)
+ ret <4 x float> %1
+}
+
define <2 x double> @llvm_fabs_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_fabs_f64(
; CHECK-NEXT: [[TMP1:%.*]] = call fast <2 x double> @llvm.fabs.v2f64(<2 x double> [[IN:%.*]])
@@ -376,6 +394,8 @@ declare <2 x double> @llvm.exp.v2f64(<2 x double>)
declare <4 x float> @llvm.exp.v4f32(<4 x float>)
declare <2 x double> @llvm.exp2.v2f64(<2 x double>)
declare <4 x float> @llvm.exp2.v4f32(<4 x float>)
+declare <2 x double> @llvm.exp10.v2f64(<2 x double>)
+declare <4 x float> @llvm.exp10.v4f32(<4 x float>)
declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
declare <2 x double> @llvm.floor.v2f64(<2 x double>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
index 0a27c74782ccba1..03d959c928577d5 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
@@ -161,6 +161,57 @@ define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
ret void
}
+declare double @llvm.exp10.f64(double)
+declare float @llvm.exp10.f32(float)
+
+define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
+; CHECK-LABEL: @exp10_f64(
+; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.*]])
+; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
+; CHECK: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+ %in = load double, ptr %in.gep, align 8
+ %call = tail call double @llvm.exp10.f64(double %in)
+ %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+ store double %call, ptr %out.gep, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
+; CHECK-LABEL: @exp10_f32(
+; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.*]])
+; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
+; CHECK: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+ %in = load float, ptr %in.gep, align 8
+ %call = tail call float @llvm.exp10.f32(float %in)
+ %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+ store float %call, ptr %out.gep, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @llvm.log.f64(double)
declare float @llvm.log.f32(float)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
index 715c2c352b7762b..14b7fd28b38e197 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
@@ -334,6 +334,71 @@ define void @llvm_exp2_f32(float* nocapture %varray) {
ret void
}
+declare double @llvm.exp10.f64(double)
+declare float @llvm.exp10.f32(float)
+
+define void @llvm_exp10_f64(double* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp10_f64
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_exp10(<2 x double> [[TMP1:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @llvm_exp10_f64
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.exp10.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+ store double %call, double* %arrayidx, align 8
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
+define void @llvm_exp10_f32(float* nocapture %varray) {
+; NEON-LABEL: define void @llvm_exp10_f32
+; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
+; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_exp10f(<4 x float> [[TMP1:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @llvm_exp10_f32
+; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE: ret void
+;
+ entry:
+ br label %for.body
+
+ for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.exp10.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+ store float %call, float* %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+ for.end:
+ ret void
+}
+
declare double @llvm.fabs.f64(double)
declare float @llvm.fabs.f32(float)
@@ -537,13 +602,13 @@ define void @llvm_log_f64(double* nocapture %varray) {
; NEON-LABEL: define void @llvm_log_f64
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log(<2 x double> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f64
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
; SVE: ret void
;
entry:
@@ -568,13 +633,13 @@ define void @llvm_log_f32(float* nocapture %varray) {
; NEON-LABEL: define void @llvm_log_f32
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <4 x float> @_ZGVnN4v_logf(<4 x float> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR10:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log_f32
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[TMP11:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[CONV:%.*]]) #[[ATTR13:[0-9]+]]
; SVE: ret void
;
entry:
@@ -602,13 +667,13 @@ define void @llvm_log10_f64(double* nocapture %varray) {
; NEON-LABEL: define void @llvm_log10_f64
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2:%.*]] = call <2 x double> @_ZGVnN2v_log10(<2 x double> [[TMP1:%.*]])
-; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR11:[0-9]+]]
; NEON: ret void
;
; SVE-LABEL: define void @llvm_log10_f64
; SVE-SAME: (ptr nocapture [[VARRAY:%.*]]) #[[ATTR1]] {
; SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[TMP11:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
-; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[CONV:%.*]]) #[[ATTR14:[0-9]+]]
; SVE: ret void
;
entry:
@@ -633,13 +698,13 @@ define void @llvm_log10_f32(float* nocapture %varray) {
; NEON-LABEL: define void @llvm_log10_f32
; NEON-SAME: (ptr nocapture [[VARRAY:%.*]]) {
; NEON: [[TMP2...
[truncated]
|
llvm/test/Transforms/LoopVectorize/AArch64/sleef-intrinsic-calls-aarch64.ll
Show resolved
Hide resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Update regex to _explicitly_ show which exp versions are added. The previous regex used `exp[^e]` to avoid matching calls like: `@llvm.experimental.stepvector`. Note: ArmPL Mappings for scalable types are not yet utilized (eg, `llvm.exp10.nxv2f64`, `llvm.exp10.nxv4f32`), as `replace-with-veclib` pass needs improvements.
f51b752
to
de6c9c8
Compare
The first commit adds tests to shows how the mappings are missing.
The second commit adds the mappings and updates those tests.