-
Notifications
You must be signed in to change notification settings - Fork 12.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[NFC][TLI] Improve tests for ArmPL and SLEEF Intrinsics. #73352
[NFC][TLI] Improve tests for ArmPL and SLEEF Intrinsics. #73352
Conversation
Auto-generate test `armpl-intrinsics.ll`, and use active lane mask to have shorter `shufflevector` check lines. Update scripts now add `@llvm.compiler.used` instead of using the regex: `@[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]`
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-aarch64 Author: Paschalis Mpeis (paschalis-mpeis) ChangesAuto-generate test Update scripts now add Patch is 53.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73352.diff 4 Files Affected:
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
index a38d4a53407c5d2..18431ae021f9766 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -15,7 +15,7 @@ declare <vscale x 2 x double> @llvm.cos.nxv2f64(<vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.cos.nxv4f32(<vscale x 4 x float>)
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @armpl_vcosq_f64, ptr @armpl_vcosq_f32, ptr @armpl_vsinq_f64, ptr @armpl_vsinq_f32, ptr @armpl_vexpq_f64, ptr @armpl_vexpq_f32, ptr @armpl_vexp2q_f64, ptr @armpl_vexp2q_f32, ptr @armpl_vexp10q_f64, ptr @armpl_vexp10q_f32, ptr @armpl_vlogq_f64, ptr @armpl_vlogq_f32, ptr @armpl_vlog2q_f64, ptr @armpl_vlog2q_f32, ptr @armpl_vlog10q_f64, ptr @armpl_vlog10q_f32], section "llvm.metadata"
;.
define <2 x double> @llvm_cos_f64(<2 x double> %in) {
; CHECK-LABEL: define <2 x double> @llvm_cos_f64
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
index cedb7dd85149d00..be247de368056e7 100644
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
@@ -4,7 +4,7 @@
target triple = "aarch64-unknown-linux-gnu"
;.
-; CHECK: @[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]] = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
+; CHECK: @llvm.compiler.used = appending global [16 x ptr] [ptr @_ZGVnN2v_cos, ptr @_ZGVnN4v_cosf, ptr @_ZGVnN2v_exp, ptr @_ZGVnN4v_expf, ptr @_ZGVnN2v_exp2, ptr @_ZGVnN4v_exp2f, ptr @_ZGVnN2v_exp10, ptr @_ZGVnN4v_exp10f, ptr @_ZGVnN2v_log, ptr @_ZGVnN4v_logf, ptr @_ZGVnN2v_log10, ptr @_ZGVnN4v_log10f, ptr @_ZGVnN2v_log2, ptr @_ZGVnN4v_log2f, ptr @_ZGVnN2v_sin, ptr @_ZGVnN4v_sinf], section "llvm.metadata"
;.
define <2 x double> @llvm_ceil_f64(<2 x double> %in) {
; CHECK-LABEL: @llvm_ceil_f64(
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
index 03d959c928577d5..07b1402b4697fa2 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/armpl-intrinsics.ll
@@ -1,10 +1,9 @@
-; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,NEON
-; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,SVE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "(\.|_v|_sv)(ceil|copysign|cos|exp\.|expf?\(|exp2|exp10|fabs|floor|fma|log|m..num|pow|nearbyint|rint|round|sin|sqrt|trunc)|(ret)" --version 2
+; RUN: opt -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefixes=NEON
+; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -S < %s | FileCheck %s --check-prefixes=SVE
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"
-
; Tests are checking if LV can vectorize loops with llvm math intrinsics
; using mappings from TLI for scalable and fixed width vectorization.
@@ -12,10 +11,18 @@ declare double @llvm.cos.f64(double)
declare float @llvm.cos.f32(float)
define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @cos_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+;
+; NEON-LABEL: define void @cos_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vcosq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]]) #[[ATTR1:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @cos_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[IN:%.*]]) #[[ATTR5:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -36,10 +43,17 @@ define void @cos_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @cos_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @cos_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @cos_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vcosq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]]) #[[ATTR2:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @cos_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.cos.f32(float [[IN:%.*]]) #[[ATTR6:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -63,10 +77,15 @@ declare double @llvm.exp.f64(double)
declare float @llvm.exp.f32(float)
define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexpq_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]]) #[[ATTR3:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp.f64(double [[IN:%.*]]) #[[ATTR7:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -87,10 +106,15 @@ define void @exp_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @exp_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexpq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]]) #[[ATTR4:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp.f32(float [[IN:%.*]]) #[[ATTR8:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -114,10 +138,17 @@ declare double @llvm.exp2.f64(double)
declare float @llvm.exp2.f32(float)
define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp2_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp2_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vexp2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]]) #[[ATTR5:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp2_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp2.f64(double [[IN:%.*]]) #[[ATTR9:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -138,10 +169,17 @@ define void @exp2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @exp2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp2_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp2_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vexp2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]]) #[[ATTR6:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp2_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp2.f32(float [[IN:%.*]]) #[[ATTR10:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -165,10 +203,17 @@ declare double @llvm.exp10.f64(double)
declare float @llvm.exp10.f32(float)
define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp10_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp10_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vexp10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]]) #[[ATTR7:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp10_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.exp10.f64(double [[IN:%.*]]) #[[ATTR11:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -189,10 +234,17 @@ define void @exp10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @exp10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @exp10_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @exp10_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vexp10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]]) #[[ATTR8:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @exp10_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.exp10.f32(float [[IN:%.*]]) #[[ATTR12:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -216,10 +268,17 @@ declare double @llvm.log.f64(double)
declare float @llvm.log.f32(float)
define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vlogq_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]]) #[[ATTR9:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.log.f64(double [[IN:%.*]]) #[[ATTR13:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -240,10 +299,17 @@ define void @log_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @log_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vlogq_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]]) #[[ATTR10:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.log.f32(float [[IN:%.*]]) #[[ATTR14:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -267,10 +333,17 @@ declare double @llvm.log2.f64(double)
declare float @llvm.log2.f32(float)
define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log2_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log2_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vlog2q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]]) #[[ATTR11:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log2_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.log2.f64(double [[IN:%.*]]) #[[ATTR15:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -291,10 +364,17 @@ define void @log2_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @log2_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log2_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log2_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vlog2q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]]) #[[ATTR12:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log2_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.log2.f32(float [[IN:%.*]]) #[[ATTR16:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -318,10 +398,17 @@ declare double @llvm.log10.f64(double)
declare float @llvm.log10.f32(float)
define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log10_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log10_f64
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <2 x double> @armpl_vlog10q_f64(<2 x double> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]]) #[[ATTR13:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log10_f64
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call double @llvm.log10.f64(double [[IN:%.*]]) #[[ATTR17:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -342,10 +429,17 @@ define void @log10_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
}
define void @log10_f32(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @log10_f32(
-; NEON: [[TMP5:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[TMP4:%.*]])
-; SVE: [[TMP5:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
-; CHECK: ret void
+; NEON-LABEL: define void @log10_f32
+; NEON-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) {
+; NEON: [[TMP4:%.*]] = call <4 x float> @armpl_vlog10q_f32(<4 x float> [[WIDE_LOAD:%.*]])
+; NEON: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]]) #[[ATTR14:[0-9]+]]
+; NEON: ret void
+;
+; SVE-LABEL: define void @log10_f32
+; SVE-SAME: (ptr nocapture [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
+; SVE: [[TMP17:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SVE: [[CALL:%.*]] = tail call float @llvm.log10.f32(float [[IN:%.*]]) #[[ATTR18:[0-9]+]]
+; SVE: ret void
;
entry:
br label %for.body
@@ -369,10 +463,17 @@ declare double @llvm.sin.f64(double)
declare float @llvm.sin.f32(float)
define void @sin_f64(ptr nocapture %in.ptr, ptr %out.ptr) {
-; CHECK-LABEL: @sin_f64(
-; NEON: [[TMP5:%.*]] = call <2 x double> @armpl_v...
[truncated]
|
`noalias` attribute was added only to the `%in.ptr` parameter of the ArmPL Intrinsics.
Auto-generate test
armpl-intrinsics.ll
, and use active lane mask to have shortershufflevector
check lines.Update scripts now add
@llvm.compiler.used
instead of using the regex:@[[LLVM_COMPILER_USED:[a-zA-Z0-9_$"\\.-]+]]