-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MSVC, ARM64] Add _Copy* and _Count* intrinsics #66554
Conversation
@llvm/pr-subscribers-backend-aarch64 @llvm/pr-subscribers-platform-windows ChangesImplement the _Count* and _Copy* Windows ARM intrinsics:
Full list of intrinsics here: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics Bug: 65405Full diff: https://github.com/llvm/llvm-project/pull/66554.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index eaae6c9ad846868..c4d34f8e15e8b8d 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -259,7 +259,6 @@ TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES,
TARGET_HEADER_BUILTIN(__break, "vi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
-
TARGET_HEADER_BUILTIN(__writex18byte, "vUNiUc", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__writex18word, "vUNiUs", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__writex18dword, "vUNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
@@ -270,6 +269,20 @@ TARGET_HEADER_BUILTIN(__readx18word, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES,
TARGET_HEADER_BUILTIN(__readx18dword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(__readx18qword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CopyDoubleFromInt64, "dSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CopyFloatFromInt32, "fSi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CopyInt32FromFloat, "Sif", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CopyInt64FromDouble, "SLLid", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+
+TARGET_HEADER_BUILTIN(_CountLeadingOnes, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountLeadingOnes64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountLeadingSigns, "UiSLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountLeadingSigns64, "UiSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountLeadingZeros, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountLeadingZeros64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountOneBits, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
+
#undef BUILTIN
#undef LANGBUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 037a2f9f7b15322..08d09c1c3d8e9de 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10752,6 +10752,68 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return Load;
}
+ if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 ||
+ BuiltinID == AArch64::BI_CopyFloatFromInt32 ||
+ BuiltinID == AArch64::BI_CopyInt32FromFloat ||
+ BuiltinID == AArch64::BI_CopyInt64FromDouble) {
+ return EmitScalarExpr(E->getArg(0));
+ }
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+ BuiltinID == AArch64::BI_CountLeadingOnes64 ||
+ BuiltinID == AArch64::BI_CountLeadingZeros ||
+ BuiltinID == AArch64::BI_CountLeadingZeros64) {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = Arg->getType();
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes ||
+ BuiltinID == AArch64::BI_CountLeadingOnes64)
+ Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType));
+
+ Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
+ Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)});
+
+ if (BuiltinID == AArch64::BI_CountLeadingOnes64 ||
+ BuiltinID == AArch64::BI_CountLeadingZeros64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
+ if (BuiltinID == AArch64::BI_CountLeadingSigns ||
+ BuiltinID == AArch64::BI_CountLeadingSigns64) {
+ Value *Arg = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = Arg->getType();
+
+ Function *F;
+ if (BuiltinID == AArch64::BI_CountLeadingSigns) {
+ F = CGM.getIntrinsic(Intrinsic::aarch64_cls);
+ if (ArgType != Builder.getInt32Ty())
+ Arg =
+ Builder.CreateIntCast(Arg, Builder.getInt32Ty(), /*isSigned*/ true);
+ } else {
+ F = CGM.getIntrinsic(Intrinsic::aarch64_cls64);
+ if (ArgType != Builder.getInt64Ty())
+ Arg =
+ Builder.CreateIntCast(Arg, Builder.getInt64Ty(), /*isSigned*/ true);
+ }
+ Value *Result = Builder.CreateCall(F, Arg, "cls");
+ if (BuiltinID == AArch64::BI_CountLeadingSigns64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
+ if (BuiltinID == AArch64::BI_CountOneBits ||
+ BuiltinID == AArch64::BI_CountOneBits64) {
+ Value *ArgValue = EmitScalarExpr(E->getArg(0));
+ llvm::Type *ArgType = ArgValue->getType();
+ Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
+
+ Value *Result = Builder.CreateCall(F, ArgValue);
+ if (BuiltinID == AArch64::BI_CountOneBits64)
+ Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
+ return Result;
+ }
+
// Handle MSVC intrinsics before argument evaluation to prevent double
// evaluation.
if (std::optional<MSVCIntrin> MsvcIntId =
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index de68b07491c6c54..4678c527bfaab5a 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -572,6 +572,20 @@ unsigned char __readx18byte(unsigned long offset);
unsigned short __readx18word(unsigned long offset);
unsigned long __readx18dword(unsigned long offset);
unsigned __int64 __readx18qword(unsigned long offset);
+
+double _CopyDoubleFromInt64(__int64);
+float _CopyFloatFromInt32(__int32);
+__int32 _CopyInt32FromFloat(float);
+__int64 _CopyInt64FromDouble(double);
+
+unsigned int _CountLeadingOnes(unsigned long);
+unsigned int _CountLeadingOnes64(unsigned __int64);
+unsigned int _CountLeadingSigns(long);
+unsigned int _CountLeadingSigns64(__int64);
+unsigned int _CountLeadingZeros(unsigned long);
+unsigned int _CountLeadingZeros64(unsigned _int64);
+unsigned int _CountOneBits(unsigned long);
+unsigned int _CountOneBits64(unsigned __int64);
#endif
/*----------------------------------------------------------------------------*\
diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
index bd8e4cb27e52680..e6693f3a4a676f4 100644
--- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c
+++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c
@@ -265,5 +265,145 @@ unsigned __int64 check__readx18qword(unsigned LONG offset) {
// CHECK-MSCOMPAT: %[[RETVAL:.*]] = load i64, ptr %[[PTR]], align 1
// CHECK-MSCOMPAT: ret i64 %[[RETVAL]]
+double check__CopyDoubleFromInt64(__int64 arg1) {
+ return _CopyDoubleFromInt64(arg1);
+}
+
+// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca double, align 8
+// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i64, align 8
+// CHECK-MSCOMPAT: store i64 %[[ARG]], ptr %[[ARG]].addr, align 8
+// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG]].addr, align 8
+// CHECK-MSCOMPAT: store i64 %[[VAR0]], ptr %[[RETVAL]], align 8
+// CHECK-MSCOMPAT: %[[VAR1:.*]] = load double, ptr %[[RETVAL]], align 8
+// CHECK-MSCOMPAT: ret double %[[VAR1]]
+// CHECK-LINUX: error: call to undeclared function '_CopyDoubleFromInt64'
+
+float check__CopyFloatFromInt32(__int32 arg1) {
+ return _CopyFloatFromInt32(arg1);
+}
+
+// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca float, align 4
+// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i32, align 4
+// CHECK-MSCOMPAT: store i32 %[[ARG]], ptr %[[ARG]].addr, align 4
+// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG]].addr, align 4
+// CHECK-MSCOMPAT: store i32 %[[VAR0]], ptr %[[RETVAL]], align 4
+// CHECK-MSCOMPAT: %[[VAR1:.*]] = load float, ptr %[[RETVAL]], align 4
+// CHECK-MSCOMPAT: ret float %[[VAR1]]
+// CHECK-LINUX: error: call to undeclared function '_CopyFloatFromInt32'
+
+__int32 check__CopyInt32FromFloat(float arg1) {
+ return _CopyInt32FromFloat(arg1);
+}
+
+// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i32, align 4
+// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca float, align 4
+// CHECK-MSCOMPAT: store float %[[ARG]], ptr %[[ARG]].addr, align 4
+// CHECK-MSCOMPAT: %[[VAR0:.*]] = load float, ptr %[[ARG]].addr, align 4
+// CHECK-MSCOMPAT: store float %[[VAR0]], ptr %[[RETVAL]], align 4
+// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i32, ptr %[[RETVAL]], align 4
+// CHECK-MSCOMPAT: ret i32 %[[VAR1]]
+// CHECK-LINUX: error: call to undeclared function '_CopyInt32FromFloat'
+
+__int64 check__CopyInt64FromDouble(double arg1) {
+ return _CopyInt64FromDouble(arg1);
+}
+
+// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i64, align 8
+// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca double, align 8
+// CHECK-MSCOMPAT: store double %[[ARG]], ptr %[[ARG]].addr, align 8
+// CHECK-MSCOMPAT: %[[VAR0:.*]] = load double, ptr %[[ARG]].addr, align 8
+// CHECK-MSCOMPAT: store double %[[VAR0]], ptr %[[RETVAL]], align 8
+// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i64, ptr %[[RETVAL]], align 8
+// CHECK-MSCOMPAT: ret i64 %[[VAR1]]
+// CHECK-LINUX: error: call to undeclared function '_CopyInt64FromDouble'
+
+unsigned int check__CountLeadingOnes(unsigned LONG arg1) {
+ return _CountLeadingOnes(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4
+// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR1:.*]] = xor i32 %[[VAR0]], -1
+// CHECK-MSVC: %[[VAR2:.*]] = call i32 @llvm.ctlz.i32(i32 %1, i1 false)
+// CHECK-MSVC: ret i32 %[[VAR2]]
+
+unsigned int check__CountLeadingOnes64(unsigned __int64 arg1) {
+ return _CountLeadingOnes64(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8
+// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR1:.*]] = xor i64 %[[VAR0]], -1
+// CHECK-MSVC: %[[VAR2:.*]] = call i64 @llvm.ctlz.i64(i64 %1, i1 false)
+// CHECK-MSVC: %[[VAR3:.*]] = trunc i64 %2 to i32
+// CHECK-MSVC: ret i32 %[[VAR3]]
+
+unsigned int check__CountLeadingSigns(__int32 arg1) {
+ return _CountLeadingSigns(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4
+// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls(i32 %[[VAR0]])
+// CHECK-MSVC: ret i32 %[[CLS]]
+
+unsigned int check__CountLeadingSigns64(__int64 arg1) {
+ return _CountLeadingSigns64(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8
+// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls64(i64 %[[VAR0]])
+// CHECK-MSVC: ret i32 %[[CLS]]
+
+unsigned int check__CountLeadingZeros(__int32 arg1) {
+ return _CountLeadingZeros(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4
+// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctlz.i32(i32 %[[VAR0]], i1 false)
+// CHECK-MSVC: ret i32 %[[VAR1]]
+
+unsigned int check__CountLeadingZeros64(__int64 arg1) {
+ return _CountLeadingZeros64(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8
+// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctlz.i64(i64 %[[VAR0]], i1 false)
+// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32
+// CHECK-MSVC: ret i32 %[[VAR2]]
+
+unsigned int check_CountOneBits(unsigned LONG arg1) {
+ return _CountOneBits(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4
+// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4
+// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctpop.i32(i32 %0)
+// CHECK-MSVC: ret i32 %[[VAR1]]
+
+unsigned int check_CountOneBits64(unsigned __int64 arg1) {
+ return _CountOneBits64(arg1);
+}
+
+// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8
+// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8
+// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctpop.i64(i64 %0)
+// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %1 to i32
+// CHECK-MSVC: ret i32 %[[VAR2]]
+
+
// CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}
// CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"}
+
+
|
bbd5386
to
2f18bc9
Compare
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
BuiltinID == AArch64::BI_CopyFloatFromInt32 || | ||
BuiltinID == AArch64::BI_CopyInt32FromFloat || | ||
BuiltinID == AArch64::BI_CopyInt64FromDouble) { | ||
return EmitScalarExpr(E->getArg(0)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I suspect this won't actually work correctly in general (for example, if you try to do arithmetic on the returned value); the returned value has the wrong type.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, ok -- added a int to float cast. It seems like MSVC only emits an fmov
for this code, so for an example snippet
double copy_double_from_int64(__int64 w) {
return _CopyDoubleFromInt64((__int64) w);
}
MSVC assembly output looks like
sub sp,sp,#0x10
str x0,[sp]
ldr x8,[sp]
fmov d0,x8
add sp,sp,#0x10
ret
whereas clang assembly output (with the cast) looks like
sub sp, sp, #16
str x0, [sp, #8]
ldr d0, [sp, #8]
scvtf d0, d0
add sp, sp, #16
ret
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I meant a bitcast, not an sitofp.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done
2f18bc9
to
8a971b0
Compare
54d606a
to
b3b8c7c
Compare
b3b8c7c
to
a379f19
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Implement the _Count* and _Copy* Windows ARM intrinsics:
Full list of intrinsics here: https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics
Bug: 65405