[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

labrinea · 2023-12-20T14:43:00Z

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

llvmbot · 2023-12-20T14:43:31Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Alexandros Lamprineas (labrinea)

Changes

…ments.

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

Patch is 37.15 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76060.diff

4 Files Affected:

(modified) llvm/include/llvm/Analysis/VecFuncs.def (+36)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+2-1)
(added) llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll (+275)
(modified) llvm/test/Transforms/Util/add-TLI-mappings.ll (+122-2)

diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 4bffcdee6f9caa..ee9207bb4f7dc1 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -506,12 +506,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f64", "_ZGVnN2v_log2", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "log10", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "llvm.log10.f64", "_ZGVnN2v_log10", FIXED(2), "_ZGV_LLVM_N2v")
 
+TLI_DEFINE_VECFUNC( "modf", "_ZGVnN2vl8_modf", FIXED(2), "_ZGV_LLVM_N2vl8")
+
 TLI_DEFINE_VECFUNC( "pow", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
 TLI_DEFINE_VECFUNC( "llvm.pow.f64", "_ZGVnN2vv_pow", FIXED(2), "_ZGV_LLVM_N2vv")
 
 TLI_DEFINE_VECFUNC( "sin", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC( "llvm.sin.f64", "_ZGVnN2v_sin", FIXED(2), "_ZGV_LLVM_N2v")
 
+TLI_DEFINE_VECFUNC( "sincos", "_ZGVnN2vl8l8_sincos", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
+TLI_DEFINE_VECFUNC( "sincospi", "_ZGVnN2vl8l8_sincospi", FIXED(2), "_ZGV_LLVM_N2vl8l8")
+
 TLI_DEFINE_VECFUNC( "sinh", "_ZGVnN2v_sinh", FIXED(2), "_ZGV_LLVM_N2v")
 
 TLI_DEFINE_VECFUNC( "sqrt", "_ZGVnN2v_sqrt", FIXED(2), "_ZGV_LLVM_N2v")
@@ -560,12 +566,18 @@ TLI_DEFINE_VECFUNC( "llvm.log2.f32", "_ZGVnN4v_log2f", FIXED(4), "_ZGV_LLVM_N4v"
 TLI_DEFINE_VECFUNC( "log10f", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC( "llvm.log10.f32", "_ZGVnN4v_log10f", FIXED(4), "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC( "modff", "_ZGVnN4vl4_modff", FIXED(4), "_ZGV_LLVM_N4vl4")
+
 TLI_DEFINE_VECFUNC( "powf", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
 TLI_DEFINE_VECFUNC( "llvm.pow.f32", "_ZGVnN4vv_powf", FIXED(4), "_ZGV_LLVM_N4vv")
 
 TLI_DEFINE_VECFUNC( "sinf", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC( "llvm.sin.f32", "_ZGVnN4v_sinf", FIXED(4), "_ZGV_LLVM_N4v")
 
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVnN4vl4l4_sincosf", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVnN4vl4l4_sincospif", FIXED(4), "_ZGV_LLVM_N4vl4l4")
+
 TLI_DEFINE_VECFUNC( "sinhf", "_ZGVnN4v_sinhf", FIXED(4), "_ZGV_LLVM_N4v")
 
 TLI_DEFINE_VECFUNC( "sqrtf", "_ZGVnN4v_sqrtf", FIXED(4), "_ZGV_LLVM_N4v")
@@ -637,6 +649,9 @@ TLI_DEFINE_VECFUNC("log10f", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f64", "_ZGVsMxv_log10", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "_ZGVsMxv_log10f", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("modf", "_ZGVsMxvl8_modf", SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "_ZGVsMxvl4_modff", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
 TLI_DEFINE_VECFUNC("pow", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
 TLI_DEFINE_VECFUNC("powf", "_ZGVsMxvv_powf", SCALABLE(4), MASKED, "_ZGVsMxvv")
 TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGVsMxvv_pow", SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -647,6 +662,12 @@ TLI_DEFINE_VECFUNC("sinf", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGVsMxv_sin", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f32", "_ZGVsMxv_sinf", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("sincos", "_ZGVsMxvl8l8_sincos", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "_ZGVsMxvl4l4_sincosf", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "_ZGVsMxvl8l8_sincospi", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "_ZGVsMxvl4l4_sincospif", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
 TLI_DEFINE_VECFUNC("sinh", "_ZGVsMxv_sinh",  SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("sinhf", "_ZGVsMxv_sinhf", SCALABLE(4), MASKED, "_ZGVsMxv")
 
@@ -834,6 +855,11 @@ TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_vlog10q_f32", FIXED(4), NOMASK, "_ZG
 TLI_DEFINE_VECFUNC("llvm.log10.f64", "armpl_svlog10_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.log10.f32", "armpl_svlog10_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("modf", "armpl_vmodfq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_vmodfq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4")
+TLI_DEFINE_VECFUNC("modf", "armpl_svmodf_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvl8")
+TLI_DEFINE_VECFUNC("modff", "armpl_svmodf_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4")
+
 TLI_DEFINE_VECFUNC("nextafter", "armpl_vnextafterq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vv")
 TLI_DEFINE_VECFUNC("nextafterf", "armpl_vnextafterq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vv")
 TLI_DEFINE_VECFUNC("nextafter", "armpl_svnextafter_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvv")
@@ -859,6 +885,16 @@ TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_vsinq_f32", FIXED(4), NOMASK, "_ZGV_LL
 TLI_DEFINE_VECFUNC("llvm.sin.f64", "armpl_svsin_f64_x", SCALABLE(2), MASKED, "_ZGVsMxv")
 TLI_DEFINE_VECFUNC("llvm.sin.f32", "armpl_svsin_f32_x", SCALABLE(4), MASKED, "_ZGVsMxv")
 
+TLI_DEFINE_VECFUNC("sincos", "armpl_vsincosq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_vsincosq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincos", "armpl_svsincos_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincosf", "armpl_svsincos_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
+TLI_DEFINE_VECFUNC("sincospi", "armpl_vsincospiq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2vl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_vsincospiq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4vl4l4")
+TLI_DEFINE_VECFUNC("sincospi", "armpl_svsincospi_f64_x", SCALABLE(2), MASKED, "_ZGVsMxvl8l8")
+TLI_DEFINE_VECFUNC("sincospif", "armpl_svsincospi_f32_x", SCALABLE(4), MASKED, "_ZGVsMxvl4l4")
+
 TLI_DEFINE_VECFUNC("sinh", "armpl_vsinhq_f64", FIXED(2), NOMASK, "_ZGV_LLVM_N2v")
 TLI_DEFINE_VECFUNC("sinhf", "armpl_vsinhq_f32", FIXED(4), NOMASK, "_ZGV_LLVM_N4v")
 TLI_DEFINE_VECFUNC("sinh", "armpl_svsinh_f64_x",  SCALABLE(2), MASKED, "_ZGVsMxv")
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 02e400d590bed4..1ca9556adad6ac 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -504,7 +504,8 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
   for (unsigned Part = 0; Part < State.UF; ++Part) {
     SmallVector<Type *, 2> TysForDecl;
     // Add return type if intrinsic is overloaded on it.
-    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1)) {
+    if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1) &&
+        VectorIntrinsicID != Intrinsic::not_intrinsic) {
       TysForDecl.push_back(
           VectorType::get(CI.getType()->getScalarType(), State.VF));
     }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
new file mode 100644
index 00000000000000..ba32236f275023
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll
@@ -0,0 +1,275 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter "call.*(modf|modff|sincos|sincosf|sincospi|sincospif)" --version 4
+
+; RUN: opt < %s -mattr=+neon -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=SLEEF-SVE
+; RUN: opt < %s -mattr=+neon -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-NEON
+; RUN: opt < %s -mattr=+sve -vector-library=ArmPL -passes=inject-tli-mappings,loop-vectorize -S | FileCheck %s --check-prefix=ARMPL-SVE
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @test_modf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-NEON:    [[TMP8:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON:    [[TMP9:%.*]] = call <2 x double> @_ZGVnN2vl8_modf(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; SLEEF-SVE:    [[TMP26:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    [[TMP27:%.*]] = call <vscale x 2 x double> @_ZGVsMxvl8_modf(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-NEON:    [[TMP8:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON:    [[TMP9:%.*]] = call <2 x double> @armpl_vmodfq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR1:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modf(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; ARMPL-SVE:    [[TMP26:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    [[TMP27:%.*]] = call <vscale x 2 x double> @armpl_svmodf_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    [[DATA:%.*]] = call double @modf(double [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %data = call double @modf(double %num, ptr %gepb)
+  %gepc = getelementptr inbounds double, ptr %c, i64 %indvars.iv
+  store double %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_modff(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_modff(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    [[TMP8:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; SLEEF-NEON:    [[TMP9:%.*]] = call <4 x float> @_ZGVnN4vl4_modff(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; SLEEF-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_modff(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    [[TMP26:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    [[TMP27:%.*]] = call <vscale x 4 x float> @_ZGVsMxvl4_modff(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_modff(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    [[TMP8:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]])
+; ARMPL-NEON:    [[TMP9:%.*]] = call <4 x float> @armpl_vmodfq_f32(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]])
+; ARMPL-NEON:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR2:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_modff(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    [[TMP26:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE:    [[TMP27:%.*]] = call <vscale x 4 x float> @armpl_svmodf_f32_x(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; ARMPL-SVE:    [[DATA:%.*]] = call float @modff(float [[NUM:%.*]], ptr [[GEPB:%.*]]) #[[ATTR3:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr float, ptr %a, i64 %indvars.iv
+  %num = load float, ptr %gepa, align 8
+  %gepb = getelementptr float, ptr %b, i64 %indvars.iv
+  %data = call float @modff(float %num, ptr %gepb)
+  %gepc = getelementptr inbounds float, ptr %c, i64 %indvars.iv
+  store float %data, ptr %gepc, align 8
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_sincos(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincos(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON:    call void @_ZGVnN2vl8l8_sincos(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincos(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @_ZGVsMxvl8l8_sincos(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincos(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD:%.*]], <2 x ptr> [[TMP6:%.*]], <2 x ptr> [[TMP8:%.*]])
+; ARMPL-NEON:    call void @armpl_vsincosq_f64(<2 x double> [[WIDE_LOAD2:%.*]], <2 x ptr> [[TMP7:%.*]], <2 x ptr> [[TMP9:%.*]])
+; ARMPL-NEON:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR3:[0-9]+]]
+;
+; ARMPL-SVE-LABEL: define void @test_sincos(
+; ARMPL-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD:%.*]], <vscale x 2 x ptr> [[TMP24:%.*]], <vscale x 2 x ptr> [[TMP26:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    call void @armpl_svsincos_f64_x(<vscale x 2 x double> [[WIDE_LOAD2:%.*]], <vscale x 2 x ptr> [[TMP25:%.*]], <vscale x 2 x ptr> [[TMP27:%.*]], <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; ARMPL-SVE:    call void @sincos(double [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %gepa = getelementptr double, ptr %a, i64 %indvars.iv
+  %num = load double, ptr %gepa, align 8
+  %gepb = getelementptr double, ptr %b, i64 %indvars.iv
+  %gepc = getelementptr double, ptr %c, i64 %indvars.iv
+  call void @sincos(double %num, ptr %gepb, ptr %gepc)
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+define void @test_sincosf(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) {
+; SLEEF-NEON-LABEL: define void @test_sincosf(
+; SLEEF-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[TMP8:%.*]])
+; SLEEF-NEON:    call void @_ZGVnN4vl4l4_sincosf(<4 x float> [[WIDE_LOAD2:%.*]], <4 x ptr> [[TMP7:%.*]], <4 x ptr> [[TMP9:%.*]])
+; SLEEF-NEON:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR4:[0-9]+]]
+;
+; SLEEF-SVE-LABEL: define void @test_sincosf(
+; SLEEF-SVE-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD:%.*]], <vscale x 4 x ptr> [[TMP24:%.*]], <vscale x 4 x ptr> [[TMP26:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @_ZGVsMxvl4l4_sincosf(<vscale x 4 x float> [[WIDE_LOAD2:%.*]], <vscale x 4 x ptr> [[TMP25:%.*]], <vscale x 4 x ptr> [[TMP27:%.*]], <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; SLEEF-SVE:    call void @sincosf(float [[NUM:%.*]], ptr [[GEPB:%.*]], ptr [[GEPC:%.*]]) #[[ATTR5:[0-9]+]]
+;
+; ARMPL-NEON-LABEL: define void @test_sincosf(
+; ARMPL-NEON-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; ARMPL-NEON:    call void @armpl_vsincosq_f32(<4 x float> [[WIDE_LOAD:%.*]], <4 x ptr> [[TMP6:%.*]], <4 x ptr> [[T...
[truncated]

mgabka · 2023-12-20T14:47:39Z

could you also add tests for the replace-with-veclib-pass which is using those mapping to:
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll
llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

This patch prepares the ground for llvm#76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)

This patch prepares the ground for #76060. * Unifies ArmPL and SLEEF tests for better coverage * Replaces deprecated float* and double* types with ptr * Adds noalias attribute to pointer arguments * Adds some cmd-line options to the RUN lines to simplify output * Removes datalayout since target triple is provided * Removes checks for return statements * Refactors the regex filter for autogenerated checks * Removes redundant test file suffix (already under the AArch64 dir)

labrinea · 2023-12-27T16:28:50Z

could you also add tests for the replace-with-veclib-pass which is using those mapping to: llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef.ll llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll

I am not seeing corresponding llvm instrinsics for the veclib functions I've added. I don't think there's anything needed to be done in these test files.

mgabka · 2023-12-28T11:45:41Z

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

this isn't needed

Without a store we are not using the return value of the call. I see no harm here.

sure, I just pointed it out as it could simplify the test.

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll

When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see llvm#76060).

When creating a declaration for a vector variant, in order to determine the argument types we need to consult the VFABI demangler. This will allow us to add TLI mappings with linear arguments (see #76060).

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

labrinea requested review from mgabka and paschalis-mpeis December 20, 2023 14:43

llvmbot added vectorizers llvm:analysis Includes value tracking, cost tables and constant folding llvm:transforms labels Dec 20, 2023

mgabka reviewed Dec 20, 2023

View reviewed changes

llvm/test/Transforms/LoopVectorize/AArch64/vector-libcall-linear-args.ll Outdated Show resolved Hide resolved

mgabka reviewed Dec 20, 2023

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp Outdated Show resolved Hide resolved

labrinea mentioned this pull request Dec 21, 2023

[TLI][NFC] Autogenerate vectorized call tests for SLEEF/ArmPL. #76146

Merged

labrinea mentioned this pull request Dec 28, 2023

[LV] Fix crash when vectorizing function calls with linear args. #76274

Merged

mgabka reviewed Dec 28, 2023

View reviewed changes

labrinea mentioned this pull request Jan 2, 2024

[TLI] Use the VFABI demangling when declaring vector variants. #76753

Merged

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args.

db47426

The mappings correspond to vectorized variants (fixed/scalable) for the math functions: modf, sincos, sincospi.

labrinea force-pushed the tli-mappings-with-linear-args branch from d77ca23 to db47426 Compare January 3, 2024 16:57

labrinea changed the title ~~[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear argu…~~ [TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. Jan 3, 2024

mgabka approved these changes Jan 4, 2024

View reviewed changes

paschalis-mpeis approved these changes Jan 4, 2024

View reviewed changes

labrinea merged commit 8c7f10e into llvm:main Jan 5, 2024

labrinea deleted the tli-mappings-with-linear-args branch January 5, 2024 11:10

MacDue mentioned this pull request Nov 6, 2025

[IR] Add llvm.sincospi intrinsic #125873

Merged

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

Uh oh!

labrinea commented Dec 20, 2023 •

edited

Loading

Uh oh!

llvmbot commented Dec 20, 2023 •

edited

Loading

Uh oh!

mgabka commented Dec 20, 2023 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

labrinea commented Dec 27, 2023 •

edited

Loading

Uh oh!

mgabka Dec 28, 2023

Uh oh!

labrinea Jan 3, 2024

Uh oh!

mgabka Jan 4, 2024

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

[TLI] Add mappings to SLEEF/ArmPL libcall variants taking linear args. #76060

Uh oh!

Conversation

labrinea commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgabka commented Dec 20, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

labrinea commented Dec 27, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

mgabka Dec 28, 2023

Choose a reason for hiding this comment

Uh oh!

labrinea Jan 3, 2024

Choose a reason for hiding this comment

Uh oh!

mgabka Jan 4, 2024

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

labrinea commented Dec 20, 2023 •

edited

Loading

llvmbot commented Dec 20, 2023 •

edited

Loading

mgabka commented Dec 20, 2023 •

edited

Loading

labrinea commented Dec 27, 2023 •

edited

Loading