-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: Alexandros Lamprineas (labrinea) Changes…ments. The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi. Patch is 37.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76060.diff 4 Files Affected:
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 4bffcdee6f9caa..ee9207bb4f7dc1 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -506,12 +506,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
+
TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
+TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
+TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
@@ -560,12 +566,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v"
TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
+
TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -637,6 +649,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv")
TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -647,6 +662,12 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
@@ -834,6 +855,11 @@ TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZG
TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
+TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -859,6 +885,16 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK, "_ZGV_LL
TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
+TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02e400d590bed4..1ca9556adad6ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -504,7 +504,8 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
for (unsigned Part = 0; Part < State.UF; ++Part) {
SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
- if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+ if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1) &&
+ VectorIntrinsicID != Intrinsic::not_intrinsic) {
TysForDecl.push_back(
VectorType::get(CI.getType()->getScalarType(), State.VF));
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
new file mode 100644
index 00000000000000..ba32236f275023
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(modf|modff|sincos|sincosf|sincospi|sincospif)" --version 4
+
+; RUN: opt < %s -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-SVE
+; RUN: opt < %s -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-NEON: [[TMP8:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON: [[TMP9:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-SVE: [[TMP26:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE: [[TMP27:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-NEON: [[TMP8:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON: [[TMP9:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modf(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-SVE: [[TMP26:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE: [[TMP27:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE: [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+ %num = load double, ptr %gepa, align 8
+ %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+ %data = call double @modf(double %num, ptr %gepb)
+ %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+ store double %data, ptr %gepc, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modff(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: [[TMP8:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON: [[TMP9:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modff(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: [[TMP26:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE: [[TMP27:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modff(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: [[TMP8:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON: [[TMP9:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modff(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: [[TMP26:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE: [[TMP27:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE: [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+ %num = load float, ptr %gepa, align 8
+ %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+ %data = call float @modff(float %num, ptr %gepb)
+ %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+ store float %data, ptr %gepc, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincos(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON: call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincos(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE: call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincos(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; ARMPL-NEON: call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; ARMPL-NEON: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_sincos(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE: call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE: call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE: call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+entry:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+ %num = load double, ptr %gepa, align 8
+ %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+ %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+ call void @sincos(double %num, ptr %gepb, ptr %gepc)
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincosf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON: call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]], <4 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincosf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x ptr> [[TMP26:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE: call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x ptr> [[TMP27:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE: call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR5:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincosf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON: call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[T...
[truncated]
|
|
could you also add tests for the replace-with-veclib-pass which is using those mapping to: |
llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
Outdated
Show resolved
Hide resolved
This patch prepares the ground for llvm#76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)
This patch prepares the ground for #76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)
I am not seeing corresponding llvm instrinsics for the veclib functions I've added. I don't think there's anything needed to be done in these test files. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this isn't needed
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Without a store we are not using the return value of the call. I see no harm here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sure, I just pointed it out as it could simplify the test.
llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
Outdated
Show resolved
Hide resolved
When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see llvm#76060).
When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see #76060).
The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.
d77ca23 to
db47426
Compare
The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.