[LoongArch] SIMD intrinsics not fully inlined in caller with target feature globally enabled

I tried this code:

```rust
#![feature(stdarch_loongarch)]
use std::arch::loongarch64::*;

pub unsafe fn simd(s: i32) -> i32 {
    lsx_vpickve2gr_b::<0>(lsx_vreplgr2vr_b(s))
}
```

```sh
rustc --crate-type lib -C opt-level=3 --emit llvm-ir -o lsx.ll lsx.rs
```

I expected to see this happen:

The `lsx` intrinsics are inlined within `simd` functions when the `lsx` target feature is globally enabled.

```llvm
; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #0 {
start:
  %_2 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %s) #2
  %_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %_2, i32 noundef 0) #2
  ret i32 %_0
}

; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #1

; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #1

attributes #0 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }

```

Instead, this happened:

```llvm
; core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: read) uwtable
define internal fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef readonly align 16 dereferenceable(16) %a) unnamed_addr #0 {
start:
  %0 = load <16 x i8>, ptr %a, align 16
  %_0 = tail call noundef i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> %0, i32 noundef 0) #4
  ret i32 %_0
}

; core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
; Function Attrs: inlinehint nofree nosync nounwind memory(argmem: write) uwtable
define internal fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr dead_on_unwind noalias nocapture noundef writable writeonly align 16 dereferenceable(16) %_0, i32 noundef signext %a) unnamed_addr #1 {
start:
  %0 = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 noundef %a) #4
  store <16 x i8> %0, ptr %_0, align 16
  ret void
}

; loong64::simd
; Function Attrs: nofree nosync nounwind memory(none) uwtable
define noundef i32 @_ZN7loong644simd17h54d99178ac0d0f82E(i32 noundef signext %s) unnamed_addr #2 {
start:
  %0 = alloca [16 x i8], align 16
; call core::core_arch::loongarch64::lsx::generated::lsx_vreplgr2vr_b
  call fastcc void @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vreplgr2vr_b17h0060558a0a7e8678E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0, i32 noundef signext %s)
; call core::core_arch::loongarch64::lsx::generated::lsx_vpickve2gr_b
  %_0 = call fastcc noundef i32 @_ZN4core9core_arch11loongarch643lsx9generated16lsx_vpickve2gr_b17hbf4a6d8f95630043E(ptr noalias nocapture noundef nonnull align 16 dereferenceable(16) %0)
  ret i32 %_0
}

; Function Attrs: nofree nosync nounwind memory(none)
declare i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8>, i32 immarg) unnamed_addr #3

; Function Attrs: nofree nosync nounwind memory(none)
declare <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32) unnamed_addr #3

attributes #0 = { inlinehint nofree nosync nounwind memory(argmem: read) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #1 = { inlinehint nofree nosync nounwind memory(argmem: write) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx,+lsx,+d,+f" }
attributes #2 = { nofree nosync nounwind memory(none) uwtable "target-cpu"="generic" "target-features"="+f,+d,+lsx" }
```

### Meta


`rustc --version --verbose`:
```
rustc 1.84.0-nightly (3fee0f12e 2024-11-20)
binary: rustc
commit-hash: 3fee0f12e4f595948f8f54f57c8b7a7a58127124
commit-date: 2024-11-20
host: loongarch64-unknown-linux-gnu
release: 1.84.0-nightly
LLVM version: 19.1.3
```

`rustc -Z unstable-options --print target-spec-json`:
```
{
  "arch": "loongarch64",
  "code-model": "medium",
  "crt-objects-fallback": "false",
  "crt-static-respected": true,
  "data-layout": "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128",
  "direct-access-external-data": false,
  "dynamic-linking": true,
  "env": "gnu",
  "features": "+f,+d,+lsx",
  "has-rpath": true,
  "has-thread-local": true,
  "linker-flavor": "gnu-cc",
  "llvm-abiname": "lp64d",
  "llvm-target": "loongarch64-unknown-linux-gnu",
  "max-atomic-width": 64,
  "metadata": {
    "description": "LoongArch64 Linux, LP64D ABI (kernel 5.19, glibc 2.36)",
    "host_tools": true,
    "std": true,
    "tier": 2
  },
  "os": "linux",
  "position-independent-executables": true,
  "relro-level": "full",
  "supported-sanitizers": [
    "address",
    "leak",
    "memory",
    "thread",
    "cfi"
  ],
  "supported-split-debuginfo": [
    "packed",
    "unpacked",
    "off"
  ],
  "supports-xray": true,
  "target-family": [
    "unix"
  ],
  "target-pointer-width": "64"
}
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[LoongArch] SIMD intrinsics not fully inlined in caller with target feature globally enabled #133281

Meta

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

[LoongArch] SIMD intrinsics not fully inlined in caller with target feature globally enabled #133281

Description

Meta

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions