- 
                Notifications
    You must be signed in to change notification settings 
- Fork 15k
          [Arm64EC] Add support for half
          #152843
        
          New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
  
    [Arm64EC] Add support for half
  
  #152843
              
            Conversation
| @llvm/pr-subscribers-backend-aarch64 Author: Trevor Gross (tgross35) Changes
 MSVC does not yet support  Full diff: https://github.com/llvm/llvm-project/pull/152843.diff 6 Files Affected: 
 diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index ad8368e1692be..aa52b71b862ab 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -316,6 +316,11 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
                         ThunkArgTranslation::PointerIndirection};
   };
 
+  if (T->isHalfTy()) {
+    Out << "h";
+    return direct(T);
+  }
+
   if (T->isFloatTy()) {
     Out << "f";
     return direct(T);
@@ -327,8 +332,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
   }
 
   if (T->isFloatingPointTy()) {
-    report_fatal_error(
-        "Only 32 and 64 bit floating points are supported for ARM64EC thunks");
+    report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
+                       "for ARM64EC thunks");
   }
 
   auto &DL = M->getDataLayout();
@@ -342,8 +347,15 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
     uint64_t ElementCnt = T->getArrayNumElements();
     uint64_t ElementSizePerBytes = DL.getTypeSizeInBits(ElementTy) / 8;
     uint64_t TotalSizeBytes = ElementCnt * ElementSizePerBytes;
-    if (ElementTy->isFloatTy() || ElementTy->isDoubleTy()) {
-      Out << (ElementTy->isFloatTy() ? "F" : "D") << TotalSizeBytes;
+    if (ElementTy->isHalfTy() || ElementTy->isFloatTy() ||
+        ElementTy->isDoubleTy()) {
+      if (ElementTy->isHalfTy())
+        Out << "H";
+      else if (ElementTy->isFloatTy())
+        Out << "F";
+      else if (ElementTy->isDoubleTy())
+        Out << "D";
+      Out << TotalSizeBytes;
       if (Alignment.value() >= 16 && !Ret)
         Out << "a" << Alignment.value();
       if (TotalSizeBytes <= 8) {
@@ -355,8 +367,8 @@ ThunkArgInfo AArch64Arm64ECCallLowering::canonicalizeThunkType(
         return pointerIndirection(T);
       }
     } else if (T->isFloatingPointTy()) {
-      report_fatal_error("Only 32 and 64 bit floating points are supported for "
-                         "ARM64EC thunks");
+      report_fatal_error("Only 16, 32, and 64 bit floating points are supported "
+                         "for ARM64EC thunks");
     }
   }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 6aeeeed94543d..8b70c2738e3f4 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -85,10 +85,10 @@ define i64 @simple_integers(i8, i16, i32, i64) nounwind {
   ret i64 0
 }
 
-; NOTE: Only float and double are supported.
-define double @simple_floats(float, double) nounwind {
-; CHECK-LABEL:    .def    $ientry_thunk$cdecl$d$fd;
-; CHECK:          .section        .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+define double @simple_floats(half, float, double) nounwind {
+; CHECK-LABEL:    .def    $ientry_thunk$cdecl$d$hfd;
+; CHECK:          .section        .wowthk$aa,"xr",discard,$ientry_thunk$cdecl$d$hfd
 ; CHECK:          // %bb.0:
 ; CHECK-NEXT:     stp     q6, q7, [sp, #-176]!            // 32-byte Folded Spill
 ; CHECK-NEXT:     .seh_save_any_reg_px    q6, 176
@@ -600,7 +600,7 @@ start:
 ; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$i8$i8i8i8i8
 ; CHECK-NEXT:     .word   1
 ; CHECK-NEXT:     .symidx "#simple_floats"
-; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$d$fd
+; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$d$hfd
 ; CHECK-NEXT:     .word   1
 ; CHECK-NEXT:     .symidx "#has_varargs"
 ; CHECK-NEXT:     .symidx $ientry_thunk$cdecl$v$varargs
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
index cba7a8100930f..22bffc7a0a33d 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-exit-thunks.ll
@@ -93,10 +93,10 @@ declare i64 @simple_integers(i8, i16, i32, i64) nounwind;
 ; CHECK-NEXT:     .seh_endfunclet
 ; CHECK-NEXT:     .seh_endproc
 
-; NOTE: Only float and double are supported.
-declare double @simple_floats(float, double) nounwind;
-; CHECK-LABEL:    .def    $iexit_thunk$cdecl$d$fd;
-; CHECK:          .section        .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$fd
+; NOTE: Only half, float, and double are supported.
+declare double @simple_floats(half, float, double) nounwind;
+; CHECK-LABEL:    .def    $iexit_thunk$cdecl$d$hfd;
+; CHECK:          .section        .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$d$hfd
 ; CHECK:          // %bb.0:
 ; CHECK-NEXT:     sub     sp, sp, #48
 ; CHECK-NEXT:     .seh_stackalloc 48
@@ -129,8 +129,8 @@ declare double @simple_floats(float, double) nounwind;
 ; CHECK-NEXT:     adrp    x11, simple_floats
 ; CHECK-NEXT:     add     x11, x11, :lo12:simple_floats
 ; CHECK-NEXT:     ldr     x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT:     adrp    x10, $iexit_thunk$cdecl$d$fd
-; CHECK-NEXT:     add     x10, x10, :lo12:$iexit_thunk$cdecl$d$fd
+; CHECK-NEXT:     adrp    x10, $iexit_thunk$cdecl$d$hfd
+; CHECK-NEXT:     add     x10, x10, :lo12:$iexit_thunk$cdecl$d$hfd
 ; CHECK-NEXT:     blr     x8
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldr     x30, [sp], #16                  // 8-byte Folded Reload
@@ -282,33 +282,36 @@ declare void @has_aligned_sret(ptr align 32 sret(%TSRet)) nounwind;
 ; CHECK:          .seh_endfunclet
 ; CHECK:          .seh_endproc
 
-declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
-; CHECK-LABEL:    .def    $iexit_thunk$cdecl$m2$m2F8;
-; CHECK:          .section        .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2F8
+declare [2 x i8] @small_array([2 x i8], [2 x half], [2 x float]) nounwind;
+; CHECK-LABEL:    .def    $iexit_thunk$cdecl$m2$m2mF8;
+; CHECK:          .section        .wowthk$aa,"xr",discard,$iexit_thunk$cdecl$m2$m2mF8
 ; CHECK:          // %bb.0:
-; CHECK-NEXT:     sub     sp, sp, #64
-; CHECK-NEXT:     .seh_stackalloc 64
-; CHECK-NEXT:     stp     x29, x30, [sp, #48]             // 16-byte Folded Spill
-; CHECK-NEXT:     .seh_save_fplr  48
-; CHECK-NEXT:     add     x29, sp, #48
-; CHECK-NEXT:     .seh_add_fp     48
+; CHECK-NEXT:     sub     sp, sp, #80
+; CHECK-NEXT:     .seh_stackalloc 80
+; CHECK-NEXT:     stp     x29, x30, [sp, #64]             // 16-byte Folded Spill
+; CHECK-NEXT:     .seh_save_fplr  64
+; CHECK-NEXT:     add     x29, sp, #64
+; CHECK-NEXT:     .seh_add_fp     64
 ; CHECK-NEXT:     .seh_endprologue
-; CHECK-NEXT:     sturb   w1, [x29, #-1]
-; CHECK-NEXT:     adrp    x8, __os_arm64x_dispatch_call_no_redirect
-; CHECK-NEXT:     sturb   w0, [x29, #-2]
-; CHECK-NEXT:     ldr     x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
-; CHECK-NEXT:     stp     s0, s1, [x29, #-12]
-; CHECK-NEXT:     ldurh   w0, [x29, #-2]
-; CHECK-NEXT:     ldur    x1, [x29, #-12]
-; CHECK-NEXT:     blr     x16
-; CHECK-NEXT:     mov     w0, w8
-; CHECK-NEXT:     sturh   w8, [x29, #-14]
-; CHECK-NEXT:     ubfx    w1, w8, #8, #8
+; CHECK-NEXT:     sturb	w0, [x29, #-2]
+; CHECK-NEXT:     adrp	x8, __os_arm64x_dispatch_call_no_redirect
+; CHECK-NEXT:     sturb	w1, [x29, #-1]
+; CHECK-NEXT:     ldr	x16, [x8, :lo12:__os_arm64x_dispatch_call_no_redirect]
+; CHECK-NEXT:     stur	h0, [x29, #-6]
+; CHECK-NEXT:     ldurh	w0, [x29, #-2]
+; CHECK-NEXT:     stur	h1, [x29, #-4]
+; CHECK-NEXT:     stp	s2, s3, [x29, #-16]
+; CHECK-NEXT:     ldur	w1, [x29, #-6]
+; CHECK-NEXT:     ldur	x2, [x29, #-16]
+; CHECK-NEXT:     blr	x16
+; CHECK-NEXT:     mov	w0, w8
+; CHECK-NEXT:     sturh	w8, [x29, #-18]
+; CHECK-NEXT:     ubfx	w1, w8, #8, #8
 ; CHECK-NEXT:     .seh_startepilogue
-; CHECK-NEXT:     ldp     x29, x30, [sp, #48]             // 16-byte Folded Reload
-; CHECK-NEXT:     .seh_save_fplr  48
-; CHECK-NEXT:     add     sp, sp, #64
-; CHECK-NEXT:     .seh_stackalloc 64
+; CHECK-NEXT:     ldp	x29, x30, [sp, #64]             // 16-byte Folded Reload
+; CHECK-NEXT:     .seh_save_fplr	64
+; CHECK-NEXT:     add	sp, sp, #80
+; CHECK-NEXT:     .seh_stackalloc	80
 ; CHECK-NEXT:     .seh_endepilogue
 ; CHECK-NEXT:     ret
 ; CHECK-NEXT:     .seh_endfunclet
@@ -325,8 +328,8 @@ declare [2 x i8] @small_array([2 x i8], [2 x float]) nounwind;
 ; CHECK-NEXT:     adrp    x11, small_array
 ; CHECK-NEXT:     add     x11, x11, :lo12:small_array
 ; CHECK-NEXT:     ldr     x8, [x8, :lo12:__os_arm64x_check_icall]
-; CHECK-NEXT:     adrp    x10, $iexit_thunk$cdecl$m2$m2F8
-; CHECK-NEXT:     add     x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT:     adrp    x10, $iexit_thunk$cdecl$m2$m2mF8
+; CHECK-NEXT:     add     x10, x10, :lo12:$iexit_thunk$cdecl$m2$m2mF8
 ; CHECK-NEXT:     blr     x8
 ; CHECK-NEXT:     .seh_startepilogue
 ; CHECK-NEXT:     ldr     x30, [sp], #16                  // 8-byte Folded Reload
@@ -577,7 +580,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .symidx simple_integers
 ; CHECK-NEXT:     .word   0
 ; CHECK-NEXT:     .symidx simple_floats
-; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$d$fd
+; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$d$hfd
 ; CHECK-NEXT:     .word   4
 ; CHECK-NEXT:     .symidx "#simple_floats$exit_thunk"
 ; CHECK-NEXT:     .symidx simple_floats
@@ -601,7 +604,7 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 ; CHECK-NEXT:     .symidx has_aligned_sret
 ; CHECK-NEXT:     .word   0
 ; CHECK-NEXT:     .symidx small_array
-; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$m2$m2F8
+; CHECK-NEXT:     .symidx $iexit_thunk$cdecl$m2$m2mF8
 ; CHECK-NEXT:     .word   4
 ; CHECK-NEXT:     .symidx "#small_array$exit_thunk"
 ; CHECK-NEXT:     .symidx small_array
@@ -634,14 +637,14 @@ declare <8 x i16> @large_vector(<8 x i16> %0) nounwind;
 define void @func_caller() nounwind {
   call void @no_op()
   call i64 @simple_integers(i8 0, i16 0, i32 0, i64 0)
-  call double @simple_floats(float 0.0, double 0.0)
+  call double @simple_floats(half 0.0, float 0.0, double 0.0)
   call void (...) @has_varargs()
   %c = alloca i8
   call void @has_sret(ptr sret([100 x i8]) %c)
   %aligned = alloca %TSRet, align 32
   store %TSRet { i64 0, i64 0 }, ptr %aligned, align 32
   call void @has_aligned_sret(ptr align 32 sret(%TSRet) %aligned)
-  call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x float] [float 0.0, float 0.0])
+  call [2 x i8] @small_array([2 x i8] [i8 0, i8 0], [2 x half] [half 0.0, half 0.0], [2 x float] [float 0.0, float 0.0])
   call [3 x i64] @large_array([3 x i64] [i64 0, i64 0, i64 0], [2 x double] [double 0.0, double 0.0], [2 x [2 x i64]] [[2 x i64] [i64 0, i64 0], [2 x i64] [i64 0, i64 0]])
   call %T2 @simple_struct(%T1 { i16 0 }, %T2 { i32 0, float 0.0 }, %T3 { i64 0, double 0.0 }, %T4 { i64 0, double 0.0, i8 0 })
   call <4 x i8> @small_vector(<4 x i8> <i8 0, i8 0, i8 0, i8 0>)
diff --git a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
index ee326caa77c0a..c27d3c9588b9d 100644
--- a/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/frexp-arm64ec.ll
@@ -2,6 +2,15 @@
 
 ; Separate from llvm-frexp.ll test because this errors on half cases
 
+; ARM64EC-LABEL: test_frexp_f16_i32
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#frexp"
+; ARM64EC: fcvt h0, d0
+define { half, i32 } @test_frexp_f16_i32(half %a) {
+  %result = call { half, i32 } @llvm.frexp.f16.i32(half %a)
+  ret { half, i32 } %result
+}
+
 ; ARM64EC-LABEL: test_frexp_f32_i32
 ; ARM64EC: fcvt d0, s0
 ; ARM64EC: bl "#frexp"
diff --git a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
index 1f8eeccf9c338..0fde7b95f5462 100644
--- a/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/ldexp-arm64ec.ll
@@ -3,6 +3,15 @@
 
 ; Separate from ldexp.ll test because this errors on half cases
 
+; ARM64EC-LABEL: ldexp_f16 =
+; ARM64EC: fcvt d0, h0
+; ARM64EC: bl "#ldexp"
+; ARM64EC: fcvt h0, d0
+define half @ldexp_f16(half %val, i32 %a) {
+  %call = call half @llvm.ldexp.f16(half %val, i32 %a)
+  ret half %call
+}
+
 ; ARM64EC-LABEL: ldexp_f32 =
 ; ARM64EC: fcvt d0, s0
 ; ARM64EC: bl "#ldexp"
diff --git a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
index 707159eb432ec..2e38f3c5e9a54 100644
--- a/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
+++ b/llvm/test/CodeGen/AArch64/powi-arm64ec.ll
@@ -1,8 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=arm64ec-windows-msvc < %s | FileCheck -check-prefix=ARM64EC %s
 
-declare double @llvm.powi.f64.i32(double, i32)
+declare half @llvm.powi.f16.i32(half, i32)
 declare float @llvm.powi.f32.i32(float, i32)
+declare double @llvm.powi.f64.i32(double, i32)
+
+; ARM64EC-LABEL: powi_f16
+; ARM64EC: fcvt  s0, h0
+; ARM64EC: scvtf s1, w0
+; ARM64EC: bl "#powf"
+define half @powi_f16(half %x, i32 %n) nounwind {
+  %ret = tail call half @llvm.powi.f16.i32(half %x, i32 %n)
+  ret half %ret
+}
 
 ; ARM64EC-LABEL: powi_f32
 ; ARM64EC: scvtf s1, w0
 | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
h makes sense considering the others are f and d. However, I don't know what the namespace is here, so please double check that this isn't problematic.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm going to defer to @pmsjt here, since he owns the ABI and related documentation at https://learn.microsoft.com/en-us/windows/arm/arm64ec-abi
Unfortunately, he's out for August...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "namespace" here is arm64ec thunks. For which there is no publicly documented mangling. I think the intent might have been that you don't actually need to match MSVC's mangling, but in practice we get weird linker errors if we don't match. Which leads to a problem here: MSVC doesn't support scalar half types. So there's no documentation or code that can give us any hints here.
If we're going to pick something ourselves, I'd prefer to pick something that's very unlikely to conflict with anything Microsoft does, like __llvm_half__ or something like that.
Of course, if Microsoft gives an answer, that would be better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've changed these to __llvm_h__ and __llvm_H__ for now, this is easy enough to update later if needed.
| ✅ With the latest revision this PR passed the C/C++ code formatter. | 
abdb838    to
    9dbed2c      
    Compare
  
    | This all seems correct, but unfortunately I have no way to actually test it locally so will need some help with that. @efriedma-quic @dpaoliello would you be able to review? | 
`f16` is passed and returned in vector registers on both x86 on AArch64, the same calling convention as `f32`, so it is a straightforward type to support. The calling convention support already exists, added as part of a6065f0 ("Arm64EC entry/exit thunks, consolidated. (llvm#79067)"). Thus, add mangling and remove the error in order to make `half` work. MSVC does not yet support `_Float16`, so for now this will remain an LLVM-only extension.
9dbed2c    to
    97b7f11      
    Compare
  
            
          
                llvm/test/CodeGen/Generic/half.ll
              
                Outdated
          
        
      | ; RUN: %if xtensa-registered-target %{ llc %s -o - -mtriple=xtensa-none-elf | FileCheck %s --check-prefixes=ALL,CHECK %} | ||
|  | ||
| ; Note that arm64ec labels don't have a `:` so use `EC`, other tests do need the | ||
| ; `:` so directives with the function names don't get treated as labels. | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The labels do have a colon, it's just obscured by the quotes around the symbol name.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks, combined the labels with regex
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The "namespace" here is arm64ec thunks. For which there is no publicly documented mangling. I think the intent might have been that you don't actually need to match MSVC's mangling, but in practice we get weird linker errors if we don't match. Which leads to a problem here: MSVC doesn't support scalar half types. So there's no documentation or code that can give us any hints here.
If we're going to pick something ourselves, I'd prefer to pick something that's very unlikely to conflict with anything Microsoft does, like __llvm_half__ or something like that.
Of course, if Microsoft gives an answer, that would be better.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm fine with this as a temporary naming scheme until there's support in MSVC.
Please fix the rest of the test file.
        
          
                llvm/test/CodeGen/Generic/half.ll
              
                Outdated
          
        
      | define i16 @to_bits(half %f) nounwind { | ||
| ; ALL-LABEL: to_bits: | ||
| ; EC-LABEL: to_bits | ||
| ; ALL-LABEL: to_bits{{"}}: | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; ALL-LABEL: to_bits{{"}}: | |
| ; ALL-LABEL: to_bits{{"?}}: | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
...totally forgot to retest this file, thanks. Updated (passes locally)
        
          
                llvm/test/CodeGen/Generic/half.ll
              
                Outdated
          
        
      | define half @check_freeze(half %f) nounwind { | ||
| ; ALL-LABEL: check_freeze: | ||
| ; EC-LABEL: check_freeze | ||
| ; ALL-LABEL: check_freeze{{"}}: | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| ; ALL-LABEL: check_freeze{{"}}: | |
| ; ALL-LABEL: check_freeze{{"?}}: | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
| Thanks for reviewing, that's the last LLVM arch to support f16 🎉 (at least out of those that rust supports) I'll need one of you to merge this | 
f16is passed and returned in vector registers on both x86 on AArch64, the same calling convention asf32, so it is a straightforward type to support. The calling convention support already exists, added as part of a6065f0 ("Arm64EC entry/exit thunks, consolidated. (#79067)"). Thus, add mangling and remove the error in order to makehalfwork.MSVC does not yet support
_Float16, so for now this will remain an LLVM-only extension.Fixes the
f16portion of #94434