-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RuntimeLibCalls] Consistently disable unavailable libcalls #116214
Conversation
The logic for marking runtime libcalls unavailable currently duplicates essentially the logic for some subset of targets, where someone reported an issue and then someone went and fixed the issue for that specific target. However, the availability for most of these is completely target independent. In particular: * MULO_I128 is never available in libgcc * Various I128 libcalls are only available for 64-bit targets in libgcc * powi is never available in MSVCRT Unify the logic for these, so we don't miss any targets. This fixes llvm#16778 on AArch64, which is one of the targets that was previously missed in this logic.
@llvm/pr-subscribers-llvm-ir Author: Nikita Popov (nikic) ChangesThe logic for marking runtime libcalls unavailable currently duplicates essentially the same logic for some random subset of targets, where someone reported an issue and then someone went and fixed the issue for that specific target only. However, the availability for most of these is completely target independent. In particular:
Unify the logic for these, so we don't miss any targets. This fixes #16778 on AArch64, which is one of the targets that was previously missed in this logic. Full diff: https://github.com/llvm/llvm-project/pull/116214.diff 2 Files Affected:
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 06167559a77697..e38fce764b6403 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -205,14 +205,6 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
}
- if (TT.isAArch64()) {
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
- }
-
// Disable most libcalls on AMDGPU.
if (TT.isAMDGPU()) {
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
@@ -228,20 +220,10 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
}
- if (TT.isARM() || TT.isThumb()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MULO_I128, nullptr);
-
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
+ if (TT.isOSMSVCRT()) {
+ // MSVCRT doesn't have powi; fall back to pow
+ setLibcallName(RTLIB::POWI_F32, nullptr);
+ setLibcallName(RTLIB::POWI_F64, nullptr);
}
if (TT.getArch() == Triple::ArchType::avr) {
@@ -262,37 +244,9 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::UREM_I32, nullptr);
}
- if (TT.getArch() == Triple::ArchType::hexagon) {
- // These cause problems when the shift amount is non-constant.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
-
- if (TT.isLoongArch()) {
- if (!TT.isLoongArch64()) {
- // Set libcalls.
- setLibcallName(RTLIB::MUL_I128, nullptr);
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isMIPS32()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isPPC()) {
- if (!TT.isPPC64()) {
- // These libcalls are not available in 32-bit.
+ if (!TT.isWasm()) {
+ // These libcalls are only available in compiler-rt, not libgcc.
+ if (TT.isArch32Bit()) {
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
@@ -301,52 +255,4 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
}
setLibcallName(RTLIB::MULO_I128, nullptr);
}
-
- if (TT.isRISCV32()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
-
- if (TT.isSPARC()) {
- if (!TT.isSPARC64()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isSystemZ()) {
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
-
- if (TT.isX86()) {
- if (TT.getArch() == Triple::ArchType::x86) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
-
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I128, nullptr);
-
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
- }
}
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 7c1d9141421fd3..9ae906249826d3 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -355,15 +355,41 @@ define i128 @i128_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_checked_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: ldr x8, [sp, #8]
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x15, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x16, x0, x3
+; CHECK-NEXT: adc x10, x10, x8
+; CHECK-NEXT: adds x8, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: umulh x13, x1, x3
+; CHECK-NEXT: mov x1, x8
+; CHECK-NEXT: cinc x14, x16, hs
+; CHECK-NEXT: adds x12, x12, x14
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: cset w14, hs
+; CHECK-NEXT: adds x11, x11, x12
+; CHECK-NEXT: asr x12, x8, #63
+; CHECK-NEXT: adc x13, x13, x14
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: adc x10, x13, x10
+; CHECK-NEXT: cmp x9, x12
+; CHECK-NEXT: ccmp x10, x12, #0, eq
; CHECK-NEXT: cset w2, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -378,15 +404,41 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_overflowing_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: ldr x8, [sp, #8]
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x15, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x16, x0, x3
+; CHECK-NEXT: adc x10, x10, x8
+; CHECK-NEXT: adds x8, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: umulh x13, x1, x3
+; CHECK-NEXT: mov x1, x8
+; CHECK-NEXT: cinc x14, x16, hs
+; CHECK-NEXT: adds x12, x12, x14
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: cset w14, hs
+; CHECK-NEXT: adds x11, x11, x12
+; CHECK-NEXT: asr x12, x8, #63
+; CHECK-NEXT: adc x13, x13, x14
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: adc x10, x13, x10
+; CHECK-NEXT: cmp x9, x12
+; CHECK-NEXT: ccmp x10, x12, #0, eq
; CHECK-NEXT: cset w2, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -400,26 +452,46 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
define i128 @i128_saturating_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_saturating_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: mov x19, x3
-; CHECK-NEXT: mov x20, x1
-; CHECK-NEXT: str xzr, [sp, #8]
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: eor x8, x19, x20
-; CHECK-NEXT: ldr x9, [sp, #8]
-; CHECK-NEXT: asr x8, x8, #63
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: cmp x9, #0
-; CHECK-NEXT: eor x10, x8, #0x7fffffffffffffff
-; CHECK-NEXT: csinv x0, x0, x8, eq
-; CHECK-NEXT: csel x1, x10, x1, ne
-; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x16, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x15, x0, x3
+; CHECK-NEXT: adc x8, x10, x8
+; CHECK-NEXT: adds x10, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x17, x1, x3
+; CHECK-NEXT: adds x10, x16, x10
+; CHECK-NEXT: umulh x11, x1, x3
+; CHECK-NEXT: cinc x13, x15, hs
+; CHECK-NEXT: adds x12, x12, x13
+; CHECK-NEXT: cset w13, hs
+; CHECK-NEXT: adds x12, x17, x12
+; CHECK-NEXT: adc x11, x11, x13
+; CHECK-NEXT: adds x9, x12, x9
+; CHECK-NEXT: asr x12, x10, #63
+; CHECK-NEXT: mul x13, x0, x2
+; CHECK-NEXT: adc x8, x11, x8
+; CHECK-NEXT: eor x11, x3, x1
+; CHECK-NEXT: eor x8, x8, x12
+; CHECK-NEXT: eor x9, x9, x12
+; CHECK-NEXT: asr x11, x11, #63
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: eor x9, x11, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel x1, x9, x10, ne
+; CHECK-NEXT: csinv x0, x13, x11, eq
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
|
@llvm/pr-subscribers-backend-aarch64 Author: Nikita Popov (nikic) ChangesThe logic for marking runtime libcalls unavailable currently duplicates essentially the same logic for some random subset of targets, where someone reported an issue and then someone went and fixed the issue for that specific target only. However, the availability for most of these is completely target independent. In particular:
Unify the logic for these, so we don't miss any targets. This fixes #16778 on AArch64, which is one of the targets that was previously missed in this logic. Full diff: https://github.com/llvm/llvm-project/pull/116214.diff 2 Files Affected:
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index 06167559a77697..e38fce764b6403 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -205,14 +205,6 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::FREXP_PPCF128, nullptr);
}
- if (TT.isAArch64()) {
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
- }
-
// Disable most libcalls on AMDGPU.
if (TT.isAMDGPU()) {
for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
@@ -228,20 +220,10 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
}
- if (TT.isARM() || TT.isThumb()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MULO_I128, nullptr);
-
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
+ if (TT.isOSMSVCRT()) {
+ // MSVCRT doesn't have powi; fall back to pow
+ setLibcallName(RTLIB::POWI_F32, nullptr);
+ setLibcallName(RTLIB::POWI_F64, nullptr);
}
if (TT.getArch() == Triple::ArchType::avr) {
@@ -262,37 +244,9 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
setLibcallName(RTLIB::UREM_I32, nullptr);
}
- if (TT.getArch() == Triple::ArchType::hexagon) {
- // These cause problems when the shift amount is non-constant.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
-
- if (TT.isLoongArch()) {
- if (!TT.isLoongArch64()) {
- // Set libcalls.
- setLibcallName(RTLIB::MUL_I128, nullptr);
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isMIPS32()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isPPC()) {
- if (!TT.isPPC64()) {
- // These libcalls are not available in 32-bit.
+ if (!TT.isWasm()) {
+ // These libcalls are only available in compiler-rt, not libgcc.
+ if (TT.isArch32Bit()) {
setLibcallName(RTLIB::SHL_I128, nullptr);
setLibcallName(RTLIB::SRL_I128, nullptr);
setLibcallName(RTLIB::SRA_I128, nullptr);
@@ -301,52 +255,4 @@ void RuntimeLibcallsInfo::initLibcalls(const Triple &TT) {
}
setLibcallName(RTLIB::MULO_I128, nullptr);
}
-
- if (TT.isRISCV32()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
-
- if (TT.isSPARC()) {
- if (!TT.isSPARC64()) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
- setLibcallName(RTLIB::MULO_I128, nullptr);
- }
-
- if (TT.isSystemZ()) {
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- }
-
- if (TT.isX86()) {
- if (TT.getArch() == Triple::ArchType::x86) {
- // These libcalls are not available in 32-bit.
- setLibcallName(RTLIB::SHL_I128, nullptr);
- setLibcallName(RTLIB::SRL_I128, nullptr);
- setLibcallName(RTLIB::SRA_I128, nullptr);
- setLibcallName(RTLIB::MUL_I128, nullptr);
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I64, nullptr);
- }
-
- // The MULO libcall is not part of libgcc, only compiler-rt.
- setLibcallName(RTLIB::MULO_I128, nullptr);
-
- if (TT.isOSMSVCRT()) {
- // MSVCRT doesn't have powi; fall back to pow
- setLibcallName(RTLIB::POWI_F32, nullptr);
- setLibcallName(RTLIB::POWI_F64, nullptr);
- }
- }
}
diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll
index 7c1d9141421fd3..9ae906249826d3 100644
--- a/llvm/test/CodeGen/AArch64/i128-math.ll
+++ b/llvm/test/CodeGen/AArch64/i128-math.ll
@@ -355,15 +355,41 @@ define i128 @i128_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_checked_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: ldr x8, [sp, #8]
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x15, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x16, x0, x3
+; CHECK-NEXT: adc x10, x10, x8
+; CHECK-NEXT: adds x8, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: umulh x13, x1, x3
+; CHECK-NEXT: mov x1, x8
+; CHECK-NEXT: cinc x14, x16, hs
+; CHECK-NEXT: adds x12, x12, x14
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: cset w14, hs
+; CHECK-NEXT: adds x11, x11, x12
+; CHECK-NEXT: asr x12, x8, #63
+; CHECK-NEXT: adc x13, x13, x14
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: adc x10, x13, x10
+; CHECK-NEXT: cmp x9, x12
+; CHECK-NEXT: ccmp x10, x12, #0, eq
; CHECK-NEXT: cset w2, eq
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -378,15 +404,41 @@ define { i128, i8 } @i128_checked_mul(i128 %x, i128 %y) {
define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_overflowing_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp x30, xzr, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: ldr x8, [sp, #8]
-; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x15, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x16, x0, x3
+; CHECK-NEXT: adc x10, x10, x8
+; CHECK-NEXT: adds x8, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x11, x1, x3
+; CHECK-NEXT: adds x8, x15, x8
+; CHECK-NEXT: umulh x13, x1, x3
+; CHECK-NEXT: mov x1, x8
+; CHECK-NEXT: cinc x14, x16, hs
+; CHECK-NEXT: adds x12, x12, x14
+; CHECK-NEXT: mul x0, x0, x2
+; CHECK-NEXT: cset w14, hs
+; CHECK-NEXT: adds x11, x11, x12
+; CHECK-NEXT: asr x12, x8, #63
+; CHECK-NEXT: adc x13, x13, x14
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: adc x10, x13, x10
+; CHECK-NEXT: cmp x9, x12
+; CHECK-NEXT: ccmp x10, x12, #0, eq
; CHECK-NEXT: cset w2, ne
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
@@ -400,26 +452,46 @@ define { i128, i8 } @i128_overflowing_mul(i128 %x, i128 %y) {
define i128 @i128_saturating_mul(i128 %x, i128 %y) {
; CHECK-LABEL: i128_saturating_mul:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
-; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
-; CHECK-NEXT: add x4, sp, #8
-; CHECK-NEXT: mov x19, x3
-; CHECK-NEXT: mov x20, x1
-; CHECK-NEXT: str xzr, [sp, #8]
-; CHECK-NEXT: bl __muloti4
-; CHECK-NEXT: eor x8, x19, x20
-; CHECK-NEXT: ldr x9, [sp, #8]
-; CHECK-NEXT: asr x8, x8, #63
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: cmp x9, #0
-; CHECK-NEXT: eor x10, x8, #0x7fffffffffffffff
-; CHECK-NEXT: csinv x0, x0, x8, eq
-; CHECK-NEXT: csel x1, x10, x1, ne
-; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: asr x8, x1, #63
+; CHECK-NEXT: asr x11, x3, #63
+; CHECK-NEXT: umulh x13, x0, x2
+; CHECK-NEXT: mul x9, x2, x8
+; CHECK-NEXT: umulh x10, x2, x8
+; CHECK-NEXT: umulh x12, x11, x0
+; CHECK-NEXT: mul x14, x1, x2
+; CHECK-NEXT: add x10, x10, x9
+; CHECK-NEXT: madd x8, x3, x8, x10
+; CHECK-NEXT: madd x10, x11, x1, x12
+; CHECK-NEXT: mul x11, x11, x0
+; CHECK-NEXT: umulh x12, x1, x2
+; CHECK-NEXT: mul x16, x0, x3
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: adds x9, x11, x9
+; CHECK-NEXT: umulh x15, x0, x3
+; CHECK-NEXT: adc x8, x10, x8
+; CHECK-NEXT: adds x10, x14, x13
+; CHECK-NEXT: cinc x12, x12, hs
+; CHECK-NEXT: mul x17, x1, x3
+; CHECK-NEXT: adds x10, x16, x10
+; CHECK-NEXT: umulh x11, x1, x3
+; CHECK-NEXT: cinc x13, x15, hs
+; CHECK-NEXT: adds x12, x12, x13
+; CHECK-NEXT: cset w13, hs
+; CHECK-NEXT: adds x12, x17, x12
+; CHECK-NEXT: adc x11, x11, x13
+; CHECK-NEXT: adds x9, x12, x9
+; CHECK-NEXT: asr x12, x10, #63
+; CHECK-NEXT: mul x13, x0, x2
+; CHECK-NEXT: adc x8, x11, x8
+; CHECK-NEXT: eor x11, x3, x1
+; CHECK-NEXT: eor x8, x8, x12
+; CHECK-NEXT: eor x9, x9, x12
+; CHECK-NEXT: asr x11, x11, #63
+; CHECK-NEXT: orr x8, x9, x8
+; CHECK-NEXT: eor x9, x11, #0x7fffffffffffffff
+; CHECK-NEXT: cmp x8, #0
+; CHECK-NEXT: csel x1, x9, x10, ne
+; CHECK-NEXT: csinv x0, x13, x11, eq
; CHECK-NEXT: ret
%1 = tail call { i128, i1 } @llvm.smul.with.overflow.i128(i128 %x, i128 %y)
%2 = extractvalue { i128, i1 } %1, 0
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for looking into this. The CI shows an XCore test failing but only on Windows.
I've fixed this by using |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG, thanks.
The logic for marking runtime libcalls unavailable currently duplicates essentially the same logic for some random subset of targets, where someone reported an issue and then someone went and fixed the issue for that specific target only. However, the availability for most of these is completely target independent. In particular:
Unify the logic for these, so we don't miss any targets. This fixes #16778 on AArch64, which is one of the targets that was previously missed in this logic.