From de9c8618210103074e580105ce894e66dba782a9 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Thu, 7 Sep 2023 11:32:17 -0700 Subject: [PATCH 1/3] [MSVC, ARM64] Add _Copy* and _Count* intrinsics --- clang/include/clang/Basic/BuiltinsAArch64.def | 15 +- clang/lib/CodeGen/CGBuiltin.cpp | 62 ++++++++ clang/lib/Headers/intrin.h | 14 ++ .../test/CodeGen/arm64-microsoft-intrinsics.c | 138 ++++++++++++++++++ 4 files changed, 228 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index eaae6c9ad84686..c4d34f8e15e8b8 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -259,7 +259,6 @@ TARGET_HEADER_BUILTIN(__umulh, "ULLiULLiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, TARGET_HEADER_BUILTIN(__break, "vi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") - TARGET_HEADER_BUILTIN(__writex18byte, "vUNiUc", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__writex18word, "vUNiUs", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__writex18dword, "vUNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") @@ -270,6 +269,20 @@ TARGET_HEADER_BUILTIN(__readx18word, "UsUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, TARGET_HEADER_BUILTIN(__readx18dword, "UNiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(__readx18qword, "ULLiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CopyDoubleFromInt64, "dSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CopyFloatFromInt32, "fSi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CopyInt32FromFloat, "Sif", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CopyInt64FromDouble, "SLLid", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") + +TARGET_HEADER_BUILTIN(_CountLeadingOnes, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingOnes64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingSigns, "UiSLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingSigns64, "UiSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingZeros, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingZeros64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountOneBits, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") + #undef BUILTIN #undef LANGBUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index f727a0d5592eff..e27e74ed7a53db 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10779,6 +10779,68 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, return Load; } + if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || + BuiltinID == AArch64::BI_CopyFloatFromInt32 || + BuiltinID == AArch64::BI_CopyInt32FromFloat || + BuiltinID == AArch64::BI_CopyInt64FromDouble) { + return EmitScalarExpr(E->getArg(0)); + } + + if (BuiltinID == AArch64::BI_CountLeadingOnes || + BuiltinID == AArch64::BI_CountLeadingOnes64 || + BuiltinID == AArch64::BI_CountLeadingZeros || + BuiltinID == AArch64::BI_CountLeadingZeros64) { + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Arg->getType(); + + if (BuiltinID == AArch64::BI_CountLeadingOnes || + BuiltinID == AArch64::BI_CountLeadingOnes64) + Arg = Builder.CreateXor(Arg, Constant::getAllOnesValue(ArgType)); + + Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType); + Value *Result = Builder.CreateCall(F, {Arg, Builder.getInt1(false)}); + + if (BuiltinID == AArch64::BI_CountLeadingOnes64 || + BuiltinID == AArch64::BI_CountLeadingZeros64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + + if (BuiltinID == AArch64::BI_CountLeadingSigns || + BuiltinID == AArch64::BI_CountLeadingSigns64) { + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = Arg->getType(); + + Function *F; + if (BuiltinID == AArch64::BI_CountLeadingSigns) { + F = CGM.getIntrinsic(Intrinsic::aarch64_cls); + if (ArgType != Builder.getInt32Ty()) + Arg = + Builder.CreateIntCast(Arg, Builder.getInt32Ty(), /*isSigned*/ true); + } else { + F = CGM.getIntrinsic(Intrinsic::aarch64_cls64); + if (ArgType != Builder.getInt64Ty()) + Arg = + Builder.CreateIntCast(Arg, Builder.getInt64Ty(), /*isSigned*/ true); + } + Value *Result = Builder.CreateCall(F, Arg, "cls"); + if (BuiltinID == AArch64::BI_CountLeadingSigns64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + + if (BuiltinID == AArch64::BI_CountOneBits || + BuiltinID == AArch64::BI_CountOneBits64) { + Value *ArgValue = EmitScalarExpr(E->getArg(0)); + llvm::Type *ArgType = ArgValue->getType(); + Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType); + + Value *Result = Builder.CreateCall(F, ArgValue); + if (BuiltinID == AArch64::BI_CountOneBits64) + Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); + return Result; + } + // Handle MSVC intrinsics before argument evaluation to prevent double // evaluation. if (std::optional MsvcIntId = diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h index de68b07491c6c5..4678c527bfaab5 100644 --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -572,6 +572,20 @@ unsigned char __readx18byte(unsigned long offset); unsigned short __readx18word(unsigned long offset); unsigned long __readx18dword(unsigned long offset); unsigned __int64 __readx18qword(unsigned long offset); + +double _CopyDoubleFromInt64(__int64); +float _CopyFloatFromInt32(__int32); +__int32 _CopyInt32FromFloat(float); +__int64 _CopyInt64FromDouble(double); + +unsigned int _CountLeadingOnes(unsigned long); +unsigned int _CountLeadingOnes64(unsigned __int64); +unsigned int _CountLeadingSigns(long); +unsigned int _CountLeadingSigns64(__int64); +unsigned int _CountLeadingZeros(unsigned long); +unsigned int _CountLeadingZeros64(unsigned _int64); +unsigned int _CountOneBits(unsigned long); +unsigned int _CountOneBits64(unsigned __int64); #endif /*----------------------------------------------------------------------------*\ diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index bd8e4cb27e5268..7e35504e2a4adb 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -265,5 +265,143 @@ unsigned __int64 check__readx18qword(unsigned LONG offset) { // CHECK-MSCOMPAT: %[[RETVAL:.*]] = load i64, ptr %[[PTR]], align 1 // CHECK-MSCOMPAT: ret i64 %[[RETVAL]] +double check__CopyDoubleFromInt64(__int64 arg1) { + return _CopyDoubleFromInt64(arg1); +} + +// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca double, align 8 +// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG]], ptr %[[ARG]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG]].addr, align 8 +// CHECK-MSCOMPAT: store i64 %[[VAR0]], ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = load double, ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: ret double %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CopyDoubleFromInt64' + +float check__CopyFloatFromInt32(__int32 arg1) { + return _CopyFloatFromInt32(arg1); +} + +// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca float, align 4 +// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG]], ptr %[[ARG]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG]].addr, align 4 +// CHECK-MSCOMPAT: store i32 %[[VAR0]], ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = load float, ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: ret float %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CopyFloatFromInt32' + +__int32 check__CopyInt32FromFloat(float arg1) { + return _CopyInt32FromFloat(arg1); +} + +// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i32, align 4 +// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca float, align 4 +// CHECK-MSCOMPAT: store float %[[ARG]], ptr %[[ARG]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load float, ptr %[[ARG]].addr, align 4 +// CHECK-MSCOMPAT: store float %[[VAR0]], ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i32, ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: ret i32 %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CopyInt32FromFloat' + +__int64 check__CopyInt64FromDouble(double arg1) { + return _CopyInt64FromDouble(arg1); +} + +// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i64, align 8 +// CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca double, align 8 +// CHECK-MSCOMPAT: store double %[[ARG]], ptr %[[ARG]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load double, ptr %[[ARG]].addr, align 8 +// CHECK-MSCOMPAT: store double %[[VAR0]], ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i64, ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: ret i64 %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CopyInt64FromDouble' + +unsigned int check__CountLeadingOnes(unsigned LONG arg1) { + return _CountLeadingOnes(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR1:.*]] = xor i32 %[[VAR0]], -1 +// CHECK-MSVC: %[[VAR2:.*]] = call i32 @llvm.ctlz.i32(i32 %1, i1 false) +// CHECK-MSVC: ret i32 %[[VAR2]] + +unsigned int check__CountLeadingOnes64(unsigned __int64 arg1) { + return _CountLeadingOnes64(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR1:.*]] = xor i64 %[[VAR0]], -1 +// CHECK-MSVC: %[[VAR2:.*]] = call i64 @llvm.ctlz.i64(i64 %1, i1 false) +// CHECK-MSVC: %[[VAR3:.*]] = trunc i64 %2 to i32 +// CHECK-MSVC: ret i32 %[[VAR3]] + +unsigned int check__CountLeadingSigns(__int32 arg1) { + return _CountLeadingSigns(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls(i32 %[[VAR0]]) +// CHECK-MSVC: ret i32 %[[CLS]] + +unsigned int check__CountLeadingSigns64(__int64 arg1) { + return _CountLeadingSigns64(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls64(i64 %[[VAR0]]) +// CHECK-MSVC: ret i32 %[[CLS]] + +unsigned int check__CountLeadingZeros(__int32 arg1) { + return _CountLeadingZeros(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctlz.i32(i32 %[[VAR0]], i1 false) +// CHECK-MSVC: ret i32 %[[VAR1]] + +unsigned int check__CountLeadingZeros64(__int64 arg1) { + return _CountLeadingZeros64(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctlz.i64(i64 %[[VAR0]], i1 false) +// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32 +// CHECK-MSVC: ret i32 %[[VAR2]] + +unsigned int check_CountOneBits(unsigned LONG arg1) { + return _CountOneBits(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctpop.i32(i32 %0) +// CHECK-MSVC: ret i32 %[[VAR1]] + +unsigned int check_CountOneBits64(unsigned __int64 arg1) { + return _CountOneBits64(arg1); +} + +// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctpop.i64(i64 %0) +// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %1 to i32 +// CHECK-MSVC: ret i32 %[[VAR2]] + + // CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"} // CHECK-MSCOMPAT: ![[MD3]] = !{!"sp"} From a60608a55a29ac1f119387ec181e9b4b942b0a26 Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Mon, 18 Sep 2023 15:42:37 -0700 Subject: [PATCH 2/3] Add cast to _Copy* intrinsics --- clang/lib/CodeGen/CGBuiltin.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index e27e74ed7a53db..5bd13553514d34 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10783,7 +10783,15 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID == AArch64::BI_CopyFloatFromInt32 || BuiltinID == AArch64::BI_CopyInt32FromFloat || BuiltinID == AArch64::BI_CopyInt64FromDouble) { - return EmitScalarExpr(E->getArg(0)); + Value *Arg = EmitScalarExpr(E->getArg(0)); + llvm::Type *RetTy = ConvertType(E->getType()); + + if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || + BuiltinID == AArch64::BI_CopyFloatFromInt32) + Arg = Builder.CreateSIToFP(Arg, RetTy); + else + Arg = Builder.CreateFPToSI(Arg, RetTy); + return Arg; } if (BuiltinID == AArch64::BI_CountLeadingOnes || From a379f194f0924a3e08e4add8a5ba60aad7e8e9ac Mon Sep 17 00:00:00 2001 From: Amy Huang Date: Tue, 19 Sep 2023 12:58:18 -0700 Subject: [PATCH 3/3] Bitcast in _Copy functions and update test --- clang/include/clang/Basic/BuiltinsAArch64.def | 8 +- clang/lib/CodeGen/CGBuiltin.cpp | 25 +--- .../test/CodeGen/arm64-microsoft-intrinsics.c | 114 +++++++++--------- 3 files changed, 66 insertions(+), 81 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def index c4d34f8e15e8b8..12c7a371e0fbdb 100644 --- a/clang/include/clang/Basic/BuiltinsAArch64.def +++ b/clang/include/clang/Basic/BuiltinsAArch64.def @@ -274,13 +274,13 @@ TARGET_HEADER_BUILTIN(_CopyFloatFromInt32, "fSi", "nh", INTRIN_H, ALL_MS_LANGUAG TARGET_HEADER_BUILTIN(_CopyInt32FromFloat, "Sif", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CopyInt64FromDouble, "SLLid", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_CountLeadingOnes, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingOnes, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CountLeadingOnes64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_CountLeadingSigns, "UiSLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingSigns, "UiSNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CountLeadingSigns64, "UiSLLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_CountLeadingZeros, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountLeadingZeros, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CountLeadingZeros64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") -TARGET_HEADER_BUILTIN(_CountOneBits, "UiULi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") +TARGET_HEADER_BUILTIN(_CountOneBits, "UiUNi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") TARGET_HEADER_BUILTIN(_CountOneBits64, "UiULLi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "") #undef BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 5bd13553514d34..4a73403cb3b9a7 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10785,13 +10785,7 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, BuiltinID == AArch64::BI_CopyInt64FromDouble) { Value *Arg = EmitScalarExpr(E->getArg(0)); llvm::Type *RetTy = ConvertType(E->getType()); - - if (BuiltinID == AArch64::BI_CopyDoubleFromInt64 || - BuiltinID == AArch64::BI_CopyFloatFromInt32) - Arg = Builder.CreateSIToFP(Arg, RetTy); - else - Arg = Builder.CreateFPToSI(Arg, RetTy); - return Arg; + return Builder.CreateBitCast(Arg, RetTy); } if (BuiltinID == AArch64::BI_CountLeadingOnes || @@ -10817,20 +10811,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, if (BuiltinID == AArch64::BI_CountLeadingSigns || BuiltinID == AArch64::BI_CountLeadingSigns64) { Value *Arg = EmitScalarExpr(E->getArg(0)); - llvm::Type *ArgType = Arg->getType(); - Function *F; - if (BuiltinID == AArch64::BI_CountLeadingSigns) { - F = CGM.getIntrinsic(Intrinsic::aarch64_cls); - if (ArgType != Builder.getInt32Ty()) - Arg = - Builder.CreateIntCast(Arg, Builder.getInt32Ty(), /*isSigned*/ true); - } else { - F = CGM.getIntrinsic(Intrinsic::aarch64_cls64); - if (ArgType != Builder.getInt64Ty()) - Arg = - Builder.CreateIntCast(Arg, Builder.getInt64Ty(), /*isSigned*/ true); - } + Function *F = (BuiltinID == AArch64::BI_CountLeadingSigns) + ? CGM.getIntrinsic(Intrinsic::aarch64_cls) + : CGM.getIntrinsic(Intrinsic::aarch64_cls64); + Value *Result = Builder.CreateCall(F, Arg, "cls"); if (BuiltinID == AArch64::BI_CountLeadingSigns64) Result = Builder.CreateTrunc(Result, Builder.getInt32Ty()); diff --git a/clang/test/CodeGen/arm64-microsoft-intrinsics.c b/clang/test/CodeGen/arm64-microsoft-intrinsics.c index 7e35504e2a4adb..b15defb0894e92 100644 --- a/clang/test/CodeGen/arm64-microsoft-intrinsics.c +++ b/clang/test/CodeGen/arm64-microsoft-intrinsics.c @@ -269,12 +269,10 @@ double check__CopyDoubleFromInt64(__int64 arg1) { return _CopyDoubleFromInt64(arg1); } -// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca double, align 8 // CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i64, align 8 // CHECK-MSCOMPAT: store i64 %[[ARG]], ptr %[[ARG]].addr, align 8 // CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG]].addr, align 8 -// CHECK-MSCOMPAT: store i64 %[[VAR0]], ptr %[[RETVAL]], align 8 -// CHECK-MSCOMPAT: %[[VAR1:.*]] = load double, ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast i64 %[[VAR0]] to double // CHECK-MSCOMPAT: ret double %[[VAR1]] // CHECK-LINUX: error: call to undeclared function '_CopyDoubleFromInt64' @@ -282,12 +280,10 @@ float check__CopyFloatFromInt32(__int32 arg1) { return _CopyFloatFromInt32(arg1); } -// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca float, align 4 // CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca i32, align 4 // CHECK-MSCOMPAT: store i32 %[[ARG]], ptr %[[ARG]].addr, align 4 // CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG]].addr, align 4 -// CHECK-MSCOMPAT: store i32 %[[VAR0]], ptr %[[RETVAL]], align 4 -// CHECK-MSCOMPAT: %[[VAR1:.*]] = load float, ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast i32 %[[VAR0]] to float // CHECK-MSCOMPAT: ret float %[[VAR1]] // CHECK-LINUX: error: call to undeclared function '_CopyFloatFromInt32' @@ -295,12 +291,10 @@ __int32 check__CopyInt32FromFloat(float arg1) { return _CopyInt32FromFloat(arg1); } -// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i32, align 4 // CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca float, align 4 // CHECK-MSCOMPAT: store float %[[ARG]], ptr %[[ARG]].addr, align 4 // CHECK-MSCOMPAT: %[[VAR0:.*]] = load float, ptr %[[ARG]].addr, align 4 -// CHECK-MSCOMPAT: store float %[[VAR0]], ptr %[[RETVAL]], align 4 -// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i32, ptr %[[RETVAL]], align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast float %[[VAR0]] to i32 // CHECK-MSCOMPAT: ret i32 %[[VAR1]] // CHECK-LINUX: error: call to undeclared function '_CopyInt32FromFloat' @@ -308,12 +302,10 @@ __int64 check__CopyInt64FromDouble(double arg1) { return _CopyInt64FromDouble(arg1); } -// CHECK-MSCOMPAT: %[[RETVAL:.*]] = alloca i64, align 8 // CHECK-MSCOMPAT: %[[ARG:.*]].addr = alloca double, align 8 // CHECK-MSCOMPAT: store double %[[ARG]], ptr %[[ARG]].addr, align 8 // CHECK-MSCOMPAT: %[[VAR0:.*]] = load double, ptr %[[ARG]].addr, align 8 -// CHECK-MSCOMPAT: store double %[[VAR0]], ptr %[[RETVAL]], align 8 -// CHECK-MSCOMPAT: %[[VAR1:.*]] = load i64, ptr %[[RETVAL]], align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = bitcast double %[[VAR0]] to i64 // CHECK-MSCOMPAT: ret i64 %[[VAR1]] // CHECK-LINUX: error: call to undeclared function '_CopyInt64FromDouble' @@ -321,86 +313,94 @@ unsigned int check__CountLeadingOnes(unsigned LONG arg1) { return _CountLeadingOnes(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 -// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR1:.*]] = xor i32 %[[VAR0]], -1 -// CHECK-MSVC: %[[VAR2:.*]] = call i32 @llvm.ctlz.i32(i32 %1, i1 false) -// CHECK-MSVC: ret i32 %[[VAR2]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = xor i32 %[[VAR0]], -1 +// CHECK-MSCOMPAT: %[[VAR2:.*]] = call i32 @llvm.ctlz.i32(i32 %1, i1 false) +// CHECK-MSCOMPAT: ret i32 %[[VAR2]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingOnes' unsigned int check__CountLeadingOnes64(unsigned __int64 arg1) { return _CountLeadingOnes64(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 -// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR1:.*]] = xor i64 %[[VAR0]], -1 -// CHECK-MSVC: %[[VAR2:.*]] = call i64 @llvm.ctlz.i64(i64 %1, i1 false) -// CHECK-MSVC: %[[VAR3:.*]] = trunc i64 %2 to i32 -// CHECK-MSVC: ret i32 %[[VAR3]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = xor i64 %[[VAR0]], -1 +// CHECK-MSCOMPAT: %[[VAR2:.*]] = call i64 @llvm.ctlz.i64(i64 %1, i1 false) +// CHECK-MSCOMPAT: %[[VAR3:.*]] = trunc i64 %2 to i32 +// CHECK-MSCOMPAT: ret i32 %[[VAR3]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingOnes64' unsigned int check__CountLeadingSigns(__int32 arg1) { return _CountLeadingSigns(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 -// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls(i32 %[[VAR0]]) -// CHECK-MSVC: ret i32 %[[CLS]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[CLS:.*]] = call i32 @llvm.aarch64.cls(i32 %[[VAR0]]) +// CHECK-MSCOMPAT: ret i32 %[[CLS]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingSigns' unsigned int check__CountLeadingSigns64(__int64 arg1) { return _CountLeadingSigns64(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 -// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[CLS:.*]] = call i32 @llvm.aarch64.cls64(i64 %[[VAR0]]) -// CHECK-MSVC: ret i32 %[[CLS]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[CLS:.*]] = call i32 @llvm.aarch64.cls64(i64 %[[VAR0]]) +// CHECK-MSCOMPAT: ret i32 %[[CLS]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingSigns64' unsigned int check__CountLeadingZeros(__int32 arg1) { return _CountLeadingZeros(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 -// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctlz.i32(i32 %[[VAR0]], i1 false) -// CHECK-MSVC: ret i32 %[[VAR1]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i32 @llvm.ctlz.i32(i32 %[[VAR0]], i1 false) +// CHECK-MSCOMPAT: ret i32 %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingZeros' unsigned int check__CountLeadingZeros64(__int64 arg1) { return _CountLeadingZeros64(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 -// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctlz.i64(i64 %[[VAR0]], i1 false) -// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32 -// CHECK-MSVC: ret i32 %[[VAR2]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i64 @llvm.ctlz.i64(i64 %[[VAR0]], i1 false) +// CHECK-MSCOMPAT: %[[VAR2:.*]] = trunc i64 %[[VAR1]] to i32 +// CHECK-MSCOMPAT: ret i32 %[[VAR2]] +// CHECK-LINUX: error: call to undeclared function '_CountLeadingZeros64' unsigned int check_CountOneBits(unsigned LONG arg1) { return _CountOneBits(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i32, align 4 -// CHECK-MSVC: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 -// CHECK-MSVC: %[[VAR1:.*]] = call i32 @llvm.ctpop.i32(i32 %0) -// CHECK-MSVC: ret i32 %[[VAR1]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i32, align 4 +// CHECK-MSCOMPAT: store i32 %[[ARG1]], ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i32, ptr %[[ARG1]].addr, align 4 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i32 @llvm.ctpop.i32(i32 %0) +// CHECK-MSCOMPAT: ret i32 %[[VAR1]] +// CHECK-LINUX: error: call to undeclared function '_CountOneBits' unsigned int check_CountOneBits64(unsigned __int64 arg1) { return _CountOneBits64(arg1); } -// CHECK-MSVC: %[[ARG1:.*]].addr = alloca i64, align 8 -// CHECK-MSVC: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 -// CHECK-MSVC: %[[VAR1:.*]] = call i64 @llvm.ctpop.i64(i64 %0) -// CHECK-MSVC: %[[VAR2:.*]] = trunc i64 %1 to i32 -// CHECK-MSVC: ret i32 %[[VAR2]] +// CHECK-MSCOMPAT: %[[ARG1:.*]].addr = alloca i64, align 8 +// CHECK-MSCOMPAT: store i64 %[[ARG1]], ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR0:.*]] = load i64, ptr %[[ARG1]].addr, align 8 +// CHECK-MSCOMPAT: %[[VAR1:.*]] = call i64 @llvm.ctpop.i64(i64 %0) +// CHECK-MSCOMPAT: %[[VAR2:.*]] = trunc i64 %1 to i32 +// CHECK-MSCOMPAT: ret i32 %[[VAR2]] +// CHECK-LINUX: error: call to undeclared function '_CountOneBits64' // CHECK-MSCOMPAT: ![[MD2]] = !{!"x18"}