-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
release/19.x: [loongarch][DAG][FREEZE] Fix crash when FREEZE a half(f16) type on loongarch (#107791) #109093
Conversation
@arsenm What do you think about merging this PR to the release branch? |
@llvm/pr-subscribers-backend-loongarch Author: None (llvmbot) ChangesBackport 13280d9 Requested by: @nikic Full diff: https://github.com/llvm/llvm-project/pull/109093.diff 2 Files Affected:
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index fc5b36c2124e01..267837add575dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -332,6 +332,8 @@ class LoongArchTargetLowering : public TargetLowering {
bool isEligibleForTailCallOptimization(
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
const SmallVectorImpl<CCValAssign> &ArgLocs) const;
+
+ bool softPromoteHalfType() const override { return true; }
};
} // end namespace llvm
diff --git a/llvm/test/CodeGen/LoongArch/fp16-promote.ll b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
index 75f920b43a06ce..03965ac81f3763 100644
--- a/llvm/test/CodeGen/LoongArch/fp16-promote.ll
+++ b/llvm/test/CodeGen/LoongArch/fp16-promote.ll
@@ -126,42 +126,40 @@ define void @test_fptrunc_double(double %d, ptr %p) nounwind {
define half @test_fadd_reg(half %a, half %b) nounwind {
; LA32-LABEL: test_fadd_reg:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: move $a0, $a1
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: fmov.s $fa0, $fa1
-; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmov.s $fs1, $fa0
-; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fadd.s $fa0, $fa0, $fs1
-; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: test_fadd_reg:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -32
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: fmov.s $fa0, $fa1
-; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: move $a0, $fp
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmov.s $fs1, $fa0
-; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fadd.s $fa0, $fa0, $fs1
-; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
@@ -177,16 +175,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT: move $fp, $a1
-; LA32-NEXT: move $s0, $a0
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.hu $s0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a1, 0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: move $a0, $s0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA32-NEXT: fadd.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -201,16 +199,16 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT: move $fp, $a1
-; LA64-NEXT: move $s0, $a0
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: ld.hu $s0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a1, 0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: move $a0, $s0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fadd.s $fa0, $fs0, $fa0
+; LA64-NEXT: fadd.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -227,42 +225,40 @@ define void @test_fadd_mem(ptr %p, ptr %q) nounwind {
define half @test_fmul_reg(half %a, half %b) nounwind {
; LA32-LABEL: test_fmul_reg:
; LA32: # %bb.0:
-; LA32-NEXT: addi.w $sp, $sp, -32
-; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill
-; LA32-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA32-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: st.w $fp, $sp, 8 # 4-byte Folded Spill
+; LA32-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: move $a0, $a1
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: fmov.s $fa0, $fa1
-; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: move $a0, $fp
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmov.s $fs1, $fa0
-; LA32-NEXT: fmov.s $fa0, $fs0
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmul.s $fa0, $fa0, $fs1
-; LA32-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA32-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
-; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload
-; LA32-NEXT: addi.w $sp, $sp, 32
+; LA32-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
+; LA32-NEXT: ld.w $fp, $sp, 8 # 4-byte Folded Reload
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
; LA32-NEXT: ret
;
; LA64-LABEL: test_fmul_reg:
; LA64: # %bb.0:
; LA64-NEXT: addi.d $sp, $sp, -32
; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs0, $sp, 16 # 8-byte Folded Spill
-; LA64-NEXT: fst.d $fs1, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
+; LA64-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: move $a0, $a1
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: fmov.s $fa0, $fa1
-; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: move $a0, $fp
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmov.s $fs1, $fa0
-; LA64-NEXT: fmov.s $fa0, $fs0
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmul.s $fa0, $fa0, $fs1
-; LA64-NEXT: fld.d $fs1, $sp, 8 # 8-byte Folded Reload
-; LA64-NEXT: fld.d $fs0, $sp, 16 # 8-byte Folded Reload
+; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload
; LA64-NEXT: addi.d $sp, $sp, 32
; LA64-NEXT: ret
@@ -278,16 +274,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill
; LA32-NEXT: st.w $s0, $sp, 20 # 4-byte Folded Spill
; LA32-NEXT: fst.d $fs0, $sp, 8 # 8-byte Folded Spill
-; LA32-NEXT: move $fp, $a1
-; LA32-NEXT: move $s0, $a0
-; LA32-NEXT: ld.hu $a0, $a0, 0
+; LA32-NEXT: move $fp, $a0
+; LA32-NEXT: ld.hu $s0, $a0, 0
+; LA32-NEXT: ld.hu $a0, $a1, 0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
; LA32-NEXT: fmov.s $fs0, $fa0
-; LA32-NEXT: ld.hu $a0, $fp, 0
+; LA32-NEXT: move $a0, $s0
; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA32-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA32-NEXT: fmul.s $fa0, $fa0, $fs0
; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA32-NEXT: st.h $a0, $s0, 0
+; LA32-NEXT: st.h $a0, $fp, 0
; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
; LA32-NEXT: ld.w $s0, $sp, 20 # 4-byte Folded Reload
; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload
@@ -302,16 +298,16 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill
; LA64-NEXT: st.d $s0, $sp, 8 # 8-byte Folded Spill
; LA64-NEXT: fst.d $fs0, $sp, 0 # 8-byte Folded Spill
-; LA64-NEXT: move $fp, $a1
-; LA64-NEXT: move $s0, $a0
-; LA64-NEXT: ld.hu $a0, $a0, 0
+; LA64-NEXT: move $fp, $a0
+; LA64-NEXT: ld.hu $s0, $a0, 0
+; LA64-NEXT: ld.hu $a0, $a1, 0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
; LA64-NEXT: fmov.s $fs0, $fa0
-; LA64-NEXT: ld.hu $a0, $fp, 0
+; LA64-NEXT: move $a0, $s0
; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
-; LA64-NEXT: fmul.s $fa0, $fs0, $fa0
+; LA64-NEXT: fmul.s $fa0, $fa0, $fs0
; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
-; LA64-NEXT: st.h $a0, $s0, 0
+; LA64-NEXT: st.h $a0, $fp, 0
; LA64-NEXT: fld.d $fs0, $sp, 0 # 8-byte Folded Reload
; LA64-NEXT: ld.d $s0, $sp, 8 # 8-byte Folded Reload
; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload
@@ -324,3 +320,61 @@ define void @test_fmul_mem(ptr %p, ptr %q) nounwind {
store half %r, ptr %p
ret void
}
+
+define half @freeze_half_undef() nounwind {
+; LA32-LABEL: freeze_half_undef:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: movgr2fr.w $fa0, $zero
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: freeze_half_undef:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: movgr2fr.w $fa0, $zero
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %y1 = freeze half undef
+ %t1 = fadd half %y1, %y1
+ ret half %t1
+}
+
+define half @freeze_half_poison(half %maybe.poison) nounwind {
+; LA32-LABEL: freeze_half_poison:
+; LA32: # %bb.0:
+; LA32-NEXT: addi.w $sp, $sp, -16
+; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill
+; LA32-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA32-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA32-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload
+; LA32-NEXT: addi.w $sp, $sp, 16
+; LA32-NEXT: ret
+;
+; LA64-LABEL: freeze_half_poison:
+; LA64: # %bb.0:
+; LA64-NEXT: addi.d $sp, $sp, -16
+; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill
+; LA64-NEXT: bl %plt(__gnu_h2f_ieee)
+; LA64-NEXT: fadd.s $fa0, $fa0, $fa0
+; LA64-NEXT: bl %plt(__gnu_f2h_ieee)
+; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload
+; LA64-NEXT: addi.d $sp, $sp, 16
+; LA64-NEXT: ret
+ %y1 = freeze half %maybe.poison
+ %t1 = fadd half %y1, %y1
+ ret half %t1
+}
|
Do I understand correctly that a side effect of this change is to change the half float ABI on loongarch from passing via FP regs to passing via GPR regs? |
llvm-project/llvm/lib/CodeGen/TargetLoweringBase.cpp Lines 1356 to 1374 in 43c9203
From the logic above, the answer seems to be yes. Because with this patch, the following functions' return values for loongarch are
llvm-project/llvm/include/llvm/CodeGen/TargetLowering.h Lines 519 to 530 in 43c9203
|
Are there still questionmarks about this one @nikic or can it be approved and merged? |
I'm concerned about changing the call ABI in a stable release, even if the ABI itself is not specified. It means that if you compile an object with LLVM 19.1.0, you may not be able to link it with an object compiled with LLVM 19.1.1. It's also worth noting that the ABI used here may not be final either, #109368 proposed to change it again. |
Currently, Rust's compiler-builtins has marked fp16 as available for loongarch64, but in fact, the functionality is broken. Even with this patch, it is not optimal. Subjectively, I hope these patches can be backported to LLVM 19 to avoid ABI incompatibility issues across multiple versions. If we don’t break anything unrelated to fp16, it won’t be a bad thing to work on fixing fp16, as it was already broken to begin with. Thanks. |
This patch is fixing the stated issue in a roundabout way. You do not need to change the ABI or half promotion strategy just to support freeze on half values |
fp16 support is generally quite broken outside of a few targets -- from a Rust perspective, we'd prefer full support in LLVM 20 rather than backporting things piecemeal to LLVM 19. Especially if it's going to change the ABI, which I think means we'd need LLVM-patch-version-specific handling in compiler-builtins. |
If you are referring to https://github.com/rust-lang/compiler-builtins/blob/bb18ce58c2b5fec081a5dd3553aa960a36b8de5c/configure.rs#L51-L91, we only disable f16/f128 on platforms where LLVM 19 actually has a crash or produces code that can't be linked. Quite a few of the targets that aren't disabled there have some form of ABI issue - so we just build the symbols but don't actually test with them. (the config for what gets tested is at https://github.com/rust-lang/rust/blob/2bd1e894efde3b6be857ad345914a3b1cea51def/library/std/build.rs#L81-L186) |
Sounds to me like we don't have to accept this patch into the 19.1 release. Please re-open and argue if you don't agree with that assesment. |
Update: #109368 (comment) I suggest continuing this PR to ensure that fp16 support is functional on the release/19.x. |
…ongarch (llvm#107791) For zig with LLVM 19.1.0rc4, we are seeing the following error when bootstrapping a `loongarch64-linux-musl` target. ziglang/zig-bootstrap#164 (comment) It seems that this issue is caused by `PromoteFloatResult` is not handling FREEZE OP on loongarch. Here is the reproduction of the error: https://godbolt.org/z/PPfvWjjG5 ~~This patch adds the FREEZE OP handling with `PromoteFloatRes_UnaryOp` and adds a test case.~~ This patch changes loongarch's way of floating point promotion to soft promotion to avoid this problem. See: loongarch's handling of `half`: - llvm#93894 - llvm#94456 Also see: other float promotion FREEZE handling - llvm@0019c2f (cherry picked from commit 13280d9)
After deciding that FPU does not support fp16, there’s no longer any ABI-breaking impact. Without this patch, LoongArch’s software emulation for fp16 operations still has correctness issues. This is why downstream software depends on it, which is the reason for my request. Thanks. |
Speaking only from a Rust perspective, don't worry too much about making breaking changes that fix I don't know what other frontends exposes this type but the commit mentions Zig. If it is preferable to keep using float registers, it sounds like there is another fix for the precision issue rather than changing the ABI #97975 (comment). I don't think there is a test for the precision yet. |
I would like to reiterate that this is a roundabout fix for the assertion in question. It sends it down a different path that happens to avoid it. It is not necessary to make this ABI change to fix the assertion. |
Disable `f161` for LoongArch64 due to incorrect code generation on LLVM 19, which causes failures in `testcrate/tests/conv.rs`. This workaround will remain in place until llvm/llvm-project#109093 is merged or we upgrade to LLVM 20.
Although the original intent of this patch was to resolve an assertion issue (ziglang/zig-bootstrap#164 (comment)), it actually addresses two miscompilation issues (#97975 #97981) as well. When the backport was initially proposed, I was believed that an ABI change would be necessary to ensure compatibility with future hardware supporting fp16. However, it's now clear that no such change will be required (#109368 (comment)). Given that this is primarily a bug fix, could we proceed with merging this backport into 19? |
I'd still prefer not to backport this. We're changing many targets to use softPromoteHalfType in LLVM 20 (hopefully all if someone gets around to it...), with the ABI changes that implies. I don't think it makes sense to backport this just for loongarch, and I also don't want to backport this for all targets where this change is made (esp as the changes are trickling in slowly over time). |
I agree with you. It's reasonable. Thanks for your feedback. |
Backport 13280d9
Requested by: @nikic