From ef39145218f584bfda7fc44f9078be02272b9e89 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Fri, 8 Nov 2024 09:45:09 +0800 Subject: [PATCH] [CIR] [Lowering] [X86_64] Support VAArg in shape --- clang/include/clang/CIR/ABIArgInfo.h | 2 + .../CIR/Dialect/Builder/CIRBaseBuilder.h | 11 + clang/include/clang/CIR/Dialect/IR/CIROps.td | 20 + .../Dialect/Transforms/LoweringPrepare.cpp | 9 +- .../Transforms/LoweringPrepareCXXABI.h | 1 + .../Transforms/LoweringPrepareX86ABI.h | 0 .../Transforms/TargetLowering/ABIInfoImpl.cpp | 12 + .../Transforms/TargetLowering/ABIInfoImpl.h | 3 + .../Transforms/TargetLowering/CIRCXXABI.h | 19 - .../Transforms/TargetLowering/CMakeLists.txt | 1 + .../TargetLowering/ItaniumCXXABI.cpp | 1 + .../Targets/LoweringPrepareX86CXXABI.cpp | 357 ++++++++++++++++++ .../Transforms/TargetLowering/Targets/X86.cpp | 92 +---- .../TargetLowering/Targets/X86_64ABIInfo.h | 96 +++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 34 +- clang/test/CIR/Lowering/var-arg-x86_64.c | 78 ++++ 16 files changed, 624 insertions(+), 112 deletions(-) create mode 100644 clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp create mode 100644 clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h create mode 100644 clang/test/CIR/Lowering/var-arg-x86_64.c diff --git a/clang/include/clang/CIR/ABIArgInfo.h b/clang/include/clang/CIR/ABIArgInfo.h index b3c3d68b9572..28215e7ba196 100644 --- a/clang/include/clang/CIR/ABIArgInfo.h +++ b/clang/include/clang/CIR/ABIArgInfo.h @@ -252,6 +252,8 @@ class ABIArgInfo { bool isExpand() const { return TheKind == Expand; } bool isCoerceAndExpand() const { return TheKind == CoerceAndExpand; } + bool isIgnore() const { return TheKind == Ignore; } + bool isSignExt() const { assert(isExtend() && "Invalid kind!"); return SignExt; diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index f1275a472f3c..225fa444e340 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -48,6 +48,17 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return create(loc, ty, getAttr(ty, val)); } + mlir::Value getSignedInt(mlir::Location loc, int64_t val, unsigned numBits) { + return getConstAPSInt( + loc, llvm::APSInt(llvm::APInt(numBits, val), /*isUnsigned=*/false)); + } + + mlir::Value getUnsignedInt(mlir::Location loc, uint64_t val, + unsigned numBits) { + return getConstAPSInt( + loc, llvm::APSInt(llvm::APInt(numBits, val), /*isUnsigned=*/true)); + } + mlir::Value getConstAPInt(mlir::Location loc, mlir::Type typ, const llvm::APInt &val) { return create(loc, typ, getAttr(typ, val)); diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index b01e9eb55517..8e43713b8fe4 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -4485,6 +4485,26 @@ def AssumeSepStorageOp : CIR_Op<"assume.separate_storage", [SameTypeOperands]> { }]; } +//===----------------------------------------------------------------------===// +// PtrMask Operations +//===----------------------------------------------------------------------===// + +def PtrMaskOp : CIR_Op<"ptr_mask", [AllTypesMatch<["ptr", "result"]>]> { + let summary = "Masks out bits of the pointer according to a mask"; + let description = [{ + The `cir.ptr_mask` operation takes a pointer and an interger `mask` as its + argument and return the masked pointer type according to the `mask`. + }]; + + let arguments = (ins CIR_PointerType:$ptr, + CIR_IntType:$mask); + let results = (outs CIR_PointerType:$result); + + let assemblyFormat = [{ + `(` $ptr `,` $mask `:` type($mask) `)` `:` qualified(type($result)) attr-dict + }]; +} + //===----------------------------------------------------------------------===// // Branch Probability Operations //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp index 031c3b3b4b40..6af33fd551f2 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepare.cpp @@ -124,9 +124,16 @@ struct LoweringPreparePass : public LoweringPrepareBase { void setASTContext(clang::ASTContext *c) { astCtx = c; - auto abiStr = c->getTargetInfo().getABI(); + const clang::TargetInfo &target = c->getTargetInfo(); + auto abiStr = target.getABI(); switch (c->getCXXABIKind()) { case clang::TargetCXXABI::GenericItanium: + if (target.getTriple().getArch() == llvm::Triple::x86_64) { + cxxABI.reset( + cir::LoweringPrepareCXXABI::createX86ABI(/*is64bit=*/true)); + break; + } + cxxABI.reset(cir::LoweringPrepareCXXABI::createItaniumABI()); break; case clang::TargetCXXABI::GenericAArch64: diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h index 47c63fae7d7b..f3ae48c13574 100644 --- a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h +++ b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareCXXABI.h @@ -28,6 +28,7 @@ class LoweringPrepareCXXABI { public: static LoweringPrepareCXXABI *createItaniumABI(); static LoweringPrepareCXXABI *createAArch64ABI(cir::AArch64ABIKind k); + static LoweringPrepareCXXABI *createX86ABI(bool is64Bit); virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder, cir::VAArgOp op, const cir::CIRDataLayout &datalayout) = 0; diff --git a/clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h b/clang/lib/CIR/Dialect/Transforms/LoweringPrepareX86ABI.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp index 2c92be20bd41..e07315d54a38 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.cpp @@ -36,6 +36,18 @@ bool isAggregateTypeForABI(mlir::Type T) { return !LowerFunction::hasScalarEvaluationKind(T); } +mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder, + mlir::Value ptr, unsigned alignment) { + // OverflowArgArea = (OverflowArgArea + Align - 1) & -Align; + mlir::Location loc = ptr.getLoc(); + mlir::Value roundUp = builder.createPtrStride( + loc, builder.createPtrBitcast(ptr, builder.getUIntNTy(8)), + builder.getUnsignedInt(loc, alignment - 1, /*width=*/32)); + return builder.create( + loc, roundUp.getType(), roundUp, + builder.getSignedInt(loc, -alignment, /*width=*/32)); +} + mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty) { if (auto RT = mlir::dyn_cast(Ty)) { if (RT.isUnion()) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h index df1cd2d0fe0d..8005b153a544 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ABIInfoImpl.h @@ -25,6 +25,9 @@ bool classifyReturnType(const CIRCXXABI &CXXABI, LowerFunctionInfo &FI, bool isAggregateTypeForABI(mlir::Type T); +mlir::Value emitRoundPointerUpToAlignment(cir::CIRBaseBuilderTy &builder, + mlir::Value ptr, unsigned alignment); + /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". mlir::Type useFirstFieldIfTransparentUnion(mlir::Type Ty); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h index a980f76f012d..0f05ec8040f8 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRCXXABI.h @@ -66,23 +66,4 @@ CIRCXXABI *CreateItaniumCXXABI(LowerModule &CGM); } // namespace cir -// FIXME(cir): Merge this into the CIRCXXABI class above. To do so, this code -// should be updated to follow some level of codegen parity. -namespace cir { - -class LoweringPrepareCXXABI { -public: - static LoweringPrepareCXXABI *createItaniumABI(); - static LoweringPrepareCXXABI *createAArch64ABI(cir::AArch64ABIKind k); - - virtual mlir::Value lowerVAArg(CIRBaseBuilderTy &builder, cir::VAArgOp op, - const cir::CIRDataLayout &datalayout) = 0; - virtual ~LoweringPrepareCXXABI() {} - - virtual mlir::Value lowerDynamicCast(CIRBaseBuilderTy &builder, - clang::ASTContext &astCtx, - cir::DynamicCastOp op) = 0; -}; -} // namespace cir - #endif // LLVM_CLANG_LIB_CIR_DIALECT_TRANSFORMS_TARGETLOWERING_CIRCXXABI_H diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt index 218656c3b144..d3cb9fc96f1a 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CMakeLists.txt @@ -17,6 +17,7 @@ add_clang_library(TargetLowering Targets/X86.cpp Targets/LoweringPrepareAArch64CXXABI.cpp Targets/LoweringPrepareItaniumCXXABI.cpp + Targets/LoweringPrepareX86CXXABI.cpp DEPENDS clangBasic diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp index deb4053dc682..081db25808d1 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/ItaniumCXXABI.cpp @@ -20,6 +20,7 @@ // //===----------------------------------------------------------------------===// +#include "../LoweringPrepareCXXABI.h" #include "CIRCXXABI.h" #include "LowerModule.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp new file mode 100644 index 000000000000..ba376d26b0fc --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/LoweringPrepareX86CXXABI.cpp @@ -0,0 +1,357 @@ +//====- LoweringPrepareX86CXXABI.cpp - Arm64 ABI specific code -------====// +// +// Part of the LLVM Project, +// under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===------------------------------------------------------------------===// +// +// This file provides X86{_64, _32} C++ ABI specific code that is used during +// LLVMIR lowering prepare. +// +//===------------------------------------------------------------------===// + +#include "../LowerModule.h" +#include "../LoweringPrepareItaniumCXXABI.h" +#include "ABIInfoImpl.h" +#include "X86_64ABIInfo.h" + +using namespace clang; +using namespace cir; + +namespace { +class LoweringPrepareX86CXXABI : public LoweringPrepareItaniumCXXABI { + bool is64; + +public: + LoweringPrepareX86CXXABI(bool is64) : is64(is64) {} + mlir::Value lowerVAArg(cir::CIRBaseBuilderTy &builder, cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) override { + if (is64) + return lowerVAArgX86_64(builder, op, datalayout); + + return lowerVAArgX86_32(builder, op, datalayout); + } + + mlir::Value lowerVAArgX86_64(cir::CIRBaseBuilderTy &builder, cir::VAArgOp op, + const cir::CIRDataLayout &datalayout); + mlir::Value lowerVAArgX86_32(cir::CIRBaseBuilderTy &builder, cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) { + llvm_unreachable("lowerVAArg for X86_32 not implemented yet"); + } +}; + +std::unique_ptr getLowerModule(cir::VAArgOp op) { + mlir::ModuleOp mo = op->getParentOfType(); + if (!mo) + return nullptr; + + mlir::PatternRewriter rewriter(mo.getContext()); + return cir::createLowerModule(mo, rewriter); +} + +mlir::Value buildX86_64VAArgFromMemory(cir::CIRBaseBuilderTy &builder, + const cir::CIRDataLayout &datalayout, + mlir::Value valist, mlir::Type Ty, + mlir::Location loc) { + mlir::Value overflow_arg_area_p = + builder.createGetMemberOp(loc, valist, "overflow_arg_area", 2); + mlir::Value overflow_arg_area = builder.createLoad(loc, overflow_arg_area_p); + + // AMD64-ABI 3.5.7p5: Step 7. Align l->overflow_arg_area upwards to a 16 + // byte boundary if alignment needed by type exceeds 8 byte boundary. + // It isn't stated explicitly in the standard, but in practice we use + // alignment greater than 16 where necessary. + unsigned alignment = datalayout.getABITypeAlign(Ty).value(); + if (alignment > 8) + overflow_arg_area = + emitRoundPointerUpToAlignment(builder, overflow_arg_area, alignment); + + // AMD64-ABI 3.5.7p5: Step 8. Fetch type from l->overflow_arg_area. + mlir::Value res = overflow_arg_area; + + // AMD64-ABI 3.5.7p5: Step 9. Set l->overflow_arg_area to: + // l->overflow_arg_area + sizeof(type). + // AMD64-ABI 3.5.7p5: Step 10. Align l->overflow_arg_area upwards to + // an 8 byte boundary. + uint64_t sizeInBytes = datalayout.getTypeStoreSize(Ty).getFixedValue(); + mlir::Value stride = builder.getSignedInt(loc, ((sizeInBytes + 7) & ~7), 32); + mlir::Value castedPtr = + builder.createPtrBitcast(overflow_arg_area, builder.getSIntNTy(8)); + overflow_arg_area = builder.createPtrStride(loc, castedPtr, stride); + builder.createStore(loc, overflow_arg_area, overflow_arg_area_p); + + return res; +} + +mlir::Value LoweringPrepareX86CXXABI::lowerVAArgX86_64( + cir::CIRBaseBuilderTy &builder, cir::VAArgOp op, + const cir::CIRDataLayout &datalayout) { + // FIXME: return early since X86_64ABIInfo::classify can't handle these types. + // Let's hope LLVM's va_arg instruction can take care of it. + // Remove this when X86_64ABIInfo::classify can take care of every type. + if (!mlir::isa(op.getType())) + return nullptr; + + // Assume that va_list type is correct; should be pointer to LLVM type: + // struct { + // i32 gp_offset; + // i32 fp_offset; + // i8* overflow_arg_area; + // i8* reg_save_area; + // }; + unsigned neededInt, neededSSE; + + std::unique_ptr lowerModule = getLowerModule(op); + if (!lowerModule) + return nullptr; + + mlir::Type ty = op.getType(); + + // FIXME: How should we access the X86AVXABILevel? + X86_64ABIInfo abiInfo(lowerModule->getTypes(), X86AVXABILevel::None); + ABIArgInfo ai = abiInfo.classifyArgumentType( + ty, 0, neededInt, neededSSE, /*isNamedArg=*/false, /*IsRegCall=*/false); + + // Empty records are ignored for parameter passing purposes. + if (ai.isIgnore()) + return nullptr; + + mlir::Location loc = op.getLoc(); + mlir::Value valist = op.getOperand(); + + // AMD64-ABI 3.5.7p5: Step 1. Determine whether type may be passed + // in the registers. If not go to step 7. + if (!neededInt && !neededSSE) + return builder.createLoad( + loc, builder.createPtrBitcast(buildX86_64VAArgFromMemory( + builder, datalayout, valist, ty, loc), + ty)); + + auto currentBlock = builder.getInsertionBlock(); + + // AMD64-ABI 3.5.7p5: Step 2. Compute num_gp to hold the number of + // general purpose registers needed to pass type and num_fp to hold + // the number of floating point registers needed. + + // AMD64-ABI 3.5.7p5: Step 3. Verify whether arguments fit into + // registers. In the case: l->gp_offset > 48 - num_gp * 8 or + // l->fp_offset > 304 - num_fp * 16 go to step 7. + // + // NOTE: 304 is a typo, there are (6 * 8 + 8 * 16) = 176 bytes of + // register save space). + + mlir::Value inRegs; + mlir::Value gp_offset_p, fp_offset_p; + mlir::Value gp_offset, fp_offset; + + if (neededInt) { + gp_offset_p = builder.createGetMemberOp(loc, valist, "gp_offset", 0); + gp_offset = builder.createLoad(loc, gp_offset_p); + inRegs = builder.getUnsignedInt(loc, 48 - neededInt * 8, 32); + inRegs = builder.createCompare(loc, cir::CmpOpKind::le, gp_offset, inRegs); + } + + if (neededSSE) { + fp_offset_p = builder.createGetMemberOp(loc, valist, "fp_offset", 1); + fp_offset = builder.createLoad(loc, fp_offset_p); + mlir::Value fitsInFP = + builder.getUnsignedInt(loc, 176 - neededSSE * 16, 32); + fitsInFP = + builder.createCompare(loc, cir::CmpOpKind::le, fp_offset, fitsInFP); + inRegs = inRegs ? builder.createAnd(inRegs, fitsInFP) : fitsInFP; + } + + mlir::Block *contBlock = currentBlock->splitBlock(op); + mlir::Block *inRegBlock = builder.createBlock(contBlock); + mlir::Block *inMemBlock = builder.createBlock(contBlock); + + builder.setInsertionPointToEnd(currentBlock); + builder.create(loc, inRegs, inRegBlock, inMemBlock); + + // Emit code to load the value if it was passed in registers. + builder.setInsertionPointToStart(inRegBlock); + + // AMD64-ABI 3.5.7p5: Step 4. Fetch type from l->reg_save_area with + // an offset of l->gp_offset and/or l->fp_offset. This may require + // copying to a temporary location in case the parameter is passed + // in different register classes or requires an alignment greater + // than 8 for general purpose registers and 16 for XMM registers. + // + // FIXME: This really results in shameful code when we end up needing to + // collect arguments from different places; often what should result in a + // simple assembling of a structure from scattered addresses has many more + // loads than necessary. Can we clean this up? + mlir::Value regSaveArea = builder.createLoad( + loc, builder.createGetMemberOp(loc, valist, "reg_save_area", 3)); + mlir::Value regAddr; + + uint64_t tyAlign = datalayout.getABITypeAlign(ty).value(); + // The alignment of result address. + uint64_t alignment = 0; + if (neededInt && neededSSE) { + // FIXME: Cleanup. + assert(ai.isDirect() && "Unexpected ABI info for mixed regs"); + StructType structTy = mlir::cast(ai.getCoerceToType()); + cir::PointerType addrTy = builder.getPointerTo(ty); + + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, structTy); + assert(structTy.getNumElements() == 2 && + "Unexpected ABI info for mixed regs"); + mlir::Type tyLo = structTy.getMembers()[0]; + mlir::Type tyHi = structTy.getMembers()[1]; + assert((isFPOrFPVectorTy(tyLo) ^ isFPOrFPVectorTy(tyHi)) && + "Unexpected ABI info for mixed regs"); + mlir::Value gpAddr = builder.createPtrStride(loc, regSaveArea, gp_offset); + mlir::Value fpAddr = builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regLoAddr = isFPOrFPVectorTy(tyLo) ? fpAddr : gpAddr; + mlir::Value regHiAddr = isFPOrFPVectorTy(tyHi) ? gpAddr : fpAddr; + + // Copy the first element. + // FIXME: Our choice of alignment here and below is probably pessimistic. + mlir::Value v = builder.createAlignedLoad( + loc, regLoAddr, datalayout.getABITypeAlign(tyLo).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "gp_offset", 0)); + + // Copy the second element. + v = builder.createAlignedLoad(loc, regHiAddr, + datalayout.getABITypeAlign(tyHi).value()); + builder.createStore(loc, v, + builder.createGetMemberOp(loc, tmp, "fp_offset", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } else if (neededInt || neededSSE == 1) { + uint64_t tySize = datalayout.getTypeStoreSize(ty).getFixedValue(); + + mlir::Type coTy; + if (ai.isDirect()) + coTy = ai.getCoerceToType(); + + mlir::Value gpOrFpOffset = neededInt ? gp_offset : fp_offset; + alignment = neededInt ? 8 : 16; + uint64_t regSize = neededInt ? neededInt * 8 : 16; + // There are two cases require special handling: + // 1) + // ``` + // struct { + // struct {} a[8]; + // int b; + // }; + // ``` + // The lower 8 bytes of the structure are not stored, + // so an 8-byte offset is needed when accessing the structure. + // 2) + // ``` + // struct { + // long long a; + // struct {} b; + // }; + // ``` + // The stored size of this structure is smaller than its actual size, + // which may lead to reading past the end of the register save area. + if (coTy && (ai.getDirectOffset() == 8 || regSize < tySize)) { + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + mlir::Value addr = + builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + mlir::Value src = builder.createAlignedLoad( + loc, builder.createPtrBitcast(addr, coTy), tyAlign); + mlir::Value ptrOffset = + builder.getUnsignedInt(loc, ai.getDirectOffset(), 32); + mlir::Value dst = builder.createPtrStride(loc, tmp, ptrOffset); + builder.createStore(loc, src, dst); + regAddr = tmp; + } else { + regAddr = builder.createPtrStride(loc, regSaveArea, gpOrFpOffset); + + // Copy into a temporary if the type is more aligned than the + // register save area. + if (neededInt && tyAlign > 8) { + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca( + loc, addrTy, ty, "tmp", CharUnits::fromQuantity(tyAlign)); + builder.createMemCpy(loc, tmp, regAddr, + builder.getUnsignedInt(loc, tySize, 32)); + regAddr = tmp; + } + } + + } else { + assert(neededSSE == 2 && "Invalid number of needed registers!"); + // SSE registers are spaced 16 bytes apart in the register save + // area, we need to collect the two eightbytes together. + // The ABI isn't explicit about this, but it seems reasonable + // to assume that the slots are 16-byte aligned, since the stack is + // naturally 16-byte aligned and the prologue is expected to store + // all the SSE registers to the RSA. + + mlir::Value regAddrLo = + builder.createPtrStride(loc, regSaveArea, fp_offset); + mlir::Value regAddrHi = builder.createPtrStride( + loc, regAddrLo, builder.getUnsignedInt(loc, 16, /*numBits=*/32)); + + mlir::MLIRContext *Context = abiInfo.getContext().getMLIRContext(); + StructType structTy = + ai.canHaveCoerceToType() + ? cast(ai.getCoerceToType()) + : StructType::get( + Context, {DoubleType::get(Context), DoubleType::get(Context)}, + /*packed=*/false, StructType::Struct); + cir::PointerType addrTy = builder.getPointerTo(ty); + mlir::Value tmp = builder.createAlloca(loc, addrTy, ty, "tmp", + CharUnits::fromQuantity(tyAlign)); + tmp = builder.createPtrBitcast(tmp, structTy); + mlir::Value v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrLo, structTy.getMembers()[0])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 0)); + v = builder.createLoad( + loc, builder.createPtrBitcast(regAddrHi, structTy.getMembers()[1])); + builder.createStore(loc, v, builder.createGetMemberOp(loc, tmp, "", 1)); + + tmp = builder.createPtrBitcast(tmp, ty); + regAddr = tmp; + } + + // AMD64-ABI 3.5.7p5: Step 5. Set: + // l->gp_offset = l->gp_offset + num_gp * 8 + // l->fp_offset = l->fp_offset + num_fp * 16. + if (neededInt) { + mlir::Value offset = builder.getUnsignedInt(loc, neededInt * 8, 32); + builder.createStore(loc, builder.createAdd(gp_offset, offset), gp_offset_p); + } + + if (neededSSE) { + mlir::Value offset = builder.getUnsignedInt(loc, neededSSE * 8, 32); + builder.createStore(loc, builder.createAdd(fp_offset, offset), fp_offset_p); + } + + builder.create(loc, mlir::ValueRange{regAddr}, contBlock); + + // Emit code to load the value if it was passed in memory. + builder.setInsertionPointToStart(inMemBlock); + mlir::Value memAddr = + buildX86_64VAArgFromMemory(builder, datalayout, valist, ty, loc); + builder.create(loc, mlir::ValueRange{memAddr}, contBlock); + + // Return the appropriate result. + builder.setInsertionPointToStart(contBlock); + mlir::Value res_addr = contBlock->addArgument(regAddr.getType(), loc); + + return alignment + ? builder.createAlignedLoad( + loc, builder.createPtrBitcast(res_addr, ty), alignment) + : builder.createLoad(loc, builder.createPtrBitcast(res_addr, ty)); +} +} // namespace + +cir::LoweringPrepareCXXABI * +cir::LoweringPrepareCXXABI::createX86ABI(bool is64Bit) { + return new LoweringPrepareX86CXXABI(is64Bit); +} diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp index 3d590b3d499b..39bd1716aa3b 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86.cpp @@ -5,6 +5,7 @@ #include "LowerModule.h" #include "LowerTypes.h" #include "TargetInfo.h" +#include "X86_64ABIInfo.h" #include "clang/CIR/ABIArgInfo.h" #include "clang/CIR/Dialect/IR/CIRDataLayout.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" @@ -101,97 +102,6 @@ mlir::Type getFPTypeAtOffset(mlir::Type IRType, unsigned IROffset, } // namespace -class X86_64ABIInfo : public ABIInfo { - using Class = cir::X86ArgClass; - - /// Implement the X86_64 ABI merging algorithm. - /// - /// Merge an accumulating classification \arg Accum with a field - /// classification \arg Field. - /// - /// \param Accum - The accumulating classification. This should - /// always be either NoClass or the result of a previous merge - /// call. In addition, this should never be Memory (the caller - /// should just return Memory for the aggregate). - static Class merge(Class Accum, Class Field); - - /// Implement the X86_64 ABI post merging algorithm. - /// - /// Post merger cleanup, reduces a malformed Hi and Lo pair to - /// final MEMORY or SSE classes when necessary. - /// - /// \param AggregateSize - The size of the current aggregate in - /// the classification process. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the higher words of the containing object. - /// - void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; - - /// Determine the x86_64 register classes in which the given type T should be - /// passed. - /// - /// \param Lo - The classification for the parts of the type - /// residing in the low word of the containing object. - /// - /// \param Hi - The classification for the parts of the type - /// residing in the high word of the containing object. - /// - /// \param OffsetBase - The bit offset of this type in the - /// containing object. Some parameters are classified different - /// depending on whether they straddle an eightbyte boundary. - /// - /// \param isNamedArg - Whether the argument in question is a "named" - /// argument, as used in AMD64-ABI 3.5.7. - /// - /// \param IsRegCall - Whether the calling conversion is regcall. - /// - /// If a word is unused its result will be NoClass; if a type should - /// be passed in Memory then at least the classification of \arg Lo - /// will be Memory. - /// - /// The \arg Lo class will be NoClass iff the argument is ignored. - /// - /// If the \arg Lo class is ComplexX87, then the \arg Hi class will - /// also be ComplexX87. - void classify(mlir::Type T, uint64_t OffsetBase, Class &Lo, Class &Hi, - bool isNamedArg, bool IsRegCall = false) const; - - mlir::Type GetSSETypeAtOffset(mlir::Type IRType, unsigned IROffset, - mlir::Type SourceTy, - unsigned SourceOffset) const; - - mlir::Type GetINTEGERTypeAtOffset(mlir::Type DestTy, unsigned IROffset, - mlir::Type SourceTy, - unsigned SourceOffset) const; - - /// The 0.98 ABI revision clarified a lot of ambiguities, - /// unfortunately in ways that were not always consistent with - /// certain previous compilers. In particular, platforms which - /// required strict binary compatibility with older versions of GCC - /// may need to exempt themselves. - bool honorsRevision0_98() const { - return !getTarget().getTriple().isOSDarwin(); - } - - X86AVXABILevel AVXLevel; - -public: - X86_64ABIInfo(LowerTypes &CGT, X86AVXABILevel AVXLevel) - : ABIInfo(CGT), AVXLevel(AVXLevel) {} - - cir::ABIArgInfo classifyReturnType(mlir::Type RetTy) const; - - ABIArgInfo classifyArgumentType(mlir::Type Ty, unsigned freeIntRegs, - unsigned &neededInt, unsigned &neededSSE, - bool isNamedArg, bool IsRegCall) const; - - void computeInfo(LowerFunctionInfo &FI) const override; -}; - class X86_64TargetLoweringInfo : public TargetLoweringInfo { public: X86_64TargetLoweringInfo(LowerTypes &LM, X86AVXABILevel AVXLevel) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h new file mode 100644 index 000000000000..201730519207 --- /dev/null +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/Targets/X86_64ABIInfo.h @@ -0,0 +1,96 @@ +#include "ABIInfo.h" +#include "clang/CIR/Target/x86.h" + +namespace cir { +class X86_64ABIInfo : public cir::ABIInfo { + using Class = cir::X86ArgClass; + + /// Implement the X86_64 ABI merging algorithm. + /// + /// Merge an accumulating classification \arg Accum with a field + /// classification \arg Field. + /// + /// \param Accum - The accumulating classification. This should + /// always be either NoClass or the result of a previous merge + /// call. In addition, this should never be Memory (the caller + /// should just return Memory for the aggregate). + static Class merge(Class Accum, Class Field); + + /// Implement the X86_64 ABI post merging algorithm. + /// + /// Post merger cleanup, reduces a malformed Hi and Lo pair to + /// final MEMORY or SSE classes when necessary. + /// + /// \param AggregateSize - The size of the current aggregate in + /// the classification process. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the higher words of the containing object. + /// + void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; + + /// Determine the x86_64 register classes in which the given type T should be + /// passed. + /// + /// \param Lo - The classification for the parts of the type + /// residing in the low word of the containing object. + /// + /// \param Hi - The classification for the parts of the type + /// residing in the high word of the containing object. + /// + /// \param OffsetBase - The bit offset of this type in the + /// containing object. Some parameters are classified different + /// depending on whether they straddle an eightbyte boundary. + /// + /// \param isNamedArg - Whether the argument in question is a "named" + /// argument, as used in AMD64-ABI 3.5.7. + /// + /// \param IsRegCall - Whether the calling conversion is regcall. + /// + /// If a word is unused its result will be NoClass; if a type should + /// be passed in Memory then at least the classification of \arg Lo + /// will be Memory. + /// + /// The \arg Lo class will be NoClass iff the argument is ignored. + /// + /// If the \arg Lo class is ComplexX87, then the \arg Hi class will + /// also be ComplexX87. + void classify(mlir::Type T, uint64_t OffsetBase, Class &Lo, Class &Hi, + bool isNamedArg, bool IsRegCall = false) const; + + mlir::Type GetSSETypeAtOffset(mlir::Type IRType, unsigned IROffset, + mlir::Type SourceTy, + unsigned SourceOffset) const; + + mlir::Type GetINTEGERTypeAtOffset(mlir::Type DestTy, unsigned IROffset, + mlir::Type SourceTy, + unsigned SourceOffset) const; + + /// The 0.98 ABI revision clarified a lot of ambiguities, + /// unfortunately in ways that were not always consistent with + /// certain previous compilers. In particular, platforms which + /// required strict binary compatibility with older versions of GCC + /// may need to exempt themselves. + bool honorsRevision0_98() const { + return !getTarget().getTriple().isOSDarwin(); + } + + ::cir::X86AVXABILevel AVXLevel; + +public: + X86_64ABIInfo(LowerTypes &CGT, cir::X86AVXABILevel AVXLevel) + : ABIInfo(CGT), AVXLevel(AVXLevel) {} + + cir::ABIArgInfo classifyReturnType(mlir::Type RetTy) const; + + cir::ABIArgInfo classifyArgumentType(mlir::Type Ty, unsigned freeIntRegs, + unsigned &neededInt, unsigned &neededSSE, + bool isNamedArg, bool IsRegCall) const; + + void computeInfo(LowerFunctionInfo &FI) const override; +}; + +} // namespace cir diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index a5d6a69693ae..f3a2eb1aa244 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -4305,6 +4305,37 @@ class CIRIsFPClassOpLowering } }; +class CIRPtrMaskOpLowering : public mlir::OpConversionPattern { +public: + using OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::PtrMaskOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const override { + // FIXME: We'd better to lower to mlir::LLVM::PtrMaskOp if it exists. + // So we have to make it manually here by following: + // https://llvm.org/docs/LangRef.html#llvm-ptrmask-intrinsic + auto loc = op.getLoc(); + auto mask = op.getMask(); + + auto moduleOp = op->getParentOfType(); + mlir::DataLayout layout(moduleOp); + auto iPtrIdxValue = layout.getTypeSizeInBits(mask.getType()); + auto iPtrIdx = mlir::IntegerType::get(moduleOp->getContext(), iPtrIdxValue); + + auto intPtr = rewriter.create( + loc, iPtrIdx, adaptor.getPtr()); // this may truncate + mlir::Value masked = + rewriter.create(loc, intPtr, adaptor.getMask()); + mlir::Value diff = rewriter.create(loc, intPtr, masked); + rewriter.replaceOpWithNewOp( + op, getTypeConverter()->convertType(op.getType()), + mlir::IntegerType::get(moduleOp->getContext(), 8), adaptor.getPtr(), + diff); + return mlir::success(); + } +}; + class CIRAbsOpLowering : public mlir::OpConversionPattern { public: using OpConversionPattern::OpConversionPattern; @@ -4398,7 +4429,8 @@ void populateCIRToLLVMConversionPatterns( CIRAssumeLowering, CIRAssumeAlignedLowering, CIRAssumeSepStorageLowering, CIRBaseClassAddrOpLowering, CIRDerivedClassAddrOpLowering, CIRVTTAddrPointOpLowering, CIRIsFPClassOpLowering, CIRAbsOpLowering, - CIRMemMoveOpLowering, CIRMemsetOpLowering, CIRSignBitOpLowering + CIRMemMoveOpLowering, CIRMemsetOpLowering, CIRSignBitOpLowering, + CIRPtrMaskOpLowering #define GET_BUILTIN_LOWERING_LIST #include "clang/CIR/Dialect/IR/CIRBuiltinsLowering.inc" #undef GET_BUILTIN_LOWERING_LIST diff --git a/clang/test/CIR/Lowering/var-arg-x86_64.c b/clang/test/CIR/Lowering/var-arg-x86_64.c new file mode 100644 index 000000000000..992d5e82cd98 --- /dev/null +++ b/clang/test/CIR/Lowering/var-arg-x86_64.c @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir -fno-clangir-call-conv-lowering %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -fno-clangir-call-conv-lowering %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s + +#include + +double f1(int n, ...) { + va_list valist; + va_start(valist, n); + double res = va_arg(valist, double); + va_end(valist); + return res; +} + +// CHECK: [[VA_LIST_TYPE:%.+]] = type { i32, i32, ptr, ptr } + +// CHECK: define {{.*}}@f1 +// CHECK: [[VA_LIST_ALLOCA:%.+]] = alloca {{.*}}[[VA_LIST_TYPE]] +// CHECK: [[VA_LIST:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_start.p0(ptr [[VA_LIST]]) +// CHECK: [[VA_LIST2:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: [[FP_OFFSET_P:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 1 +// CHECK: [[FP_OFFSET:%.+]] = load {{.*}}, ptr [[FP_OFFSET_P]] +// CHECK: [[COMPARED:%.+]] = icmp ule i32 {{.*}}, 160 +// CHECK: br i1 [[COMPARED]], label %[[THEN_BB:.+]], label %[[ELSE_BB:.+]], +// +// CHECK: [[THEN_BB]]: +// CHECK: [[UPDATED_FP_OFFSET:%.+]] = add i32 [[FP_OFFSET]], 8 +// CHECK: store i32 [[UPDATED_FP_OFFSET]], ptr [[FP_OFFSET_P]] +// CHECK: br label %[[CONT_BB:.+]], +// +// CHECK: [[ELSE_BB]]: +// CHECK: [[OVERFLOW_ARG_AREA_ADDR:%.+]] = getelementptr {{.*}} [[VA_LIST2]], i32 0, i32 2 +// CHECK: [[OVERFLOW_ARG_AREA:%.+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_ADDR]] +// CHECK: [[OVERFLOW_ARG_AREA_OFFSET:%.+]] = getelementptr {{.*}} [[OVERFLOW_ARG_AREA]], i64 8 +// CHECK: store ptr [[OVERFLOW_ARG_AREA_OFFSET]], ptr [[OVERFLOW_ARG_AREA_ADDR]] +// CHECK: br label %[[CONT_BB]] +// +// CHECK: [[CONT_BB]]: +// CHECK: [[VA_LIST3:%.+]] = getelementptr {{.*}} [[VA_LIST_ALLOCA]], i32 0 +// CHECK: call {{.*}}@llvm.va_end.p0(ptr [[VA_LIST3]]) + +// CIR: cir.func @f1 +// CIR: [[VA_LIST_ALLOCA:%.+]] = cir.alloca !cir.array, +// CIR: [[RES:%.+]] = cir.alloca !cir.double, !cir.ptr, ["res", +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: cir.va.start [[VASTED_VA_LIST]] +// CIR: [[VASTED_VA_LIST:%.+]] = cir.cast(array_to_ptrdecay, [[VA_LIST_ALLOCA]] +// CIR: [[FP_OFFSET_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][1] {name = "fp_offset"} +// CIR: [[FP_OFFSET:%.+]] = cir.load [[FP_OFFSET_P]] +// CIR: [[OFFSET_CONSTANT:%.+]] = cir.const #cir.int<160> +// CIR: [[CMP:%.+]] = cir.cmp(le, [[FP_OFFSET]], [[OFFSET_CONSTANT]]) +// CIR: cir.brcond [[CMP]] ^[[InRegBlock:.+]], ^[[InMemBlock:.+]] loc +// +// CIR: ^[[InRegBlock]]: +// CIR: [[REG_SAVE_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][3] {name = "reg_save_area"} +// CIR: [[REG_SAVE_AREA:%.+]] = cir.load [[REG_SAVE_AREA_P]] +// CIR: [[UPDATED:%.+]] = cir.ptr_stride([[REG_SAVE_AREA]] {{.*}}, [[FP_OFFSET]] +// CIR: [[CONSTANT:%.+]] = cir.const #cir.int<8> +// CIR: [[ADDED:%.+]] = cir.binop(add, [[FP_OFFSET]], [[CONSTANT]]) +// CIR: cir.store [[ADDED]], [[FP_OFFSET_P]] +// CIR: cir.br ^[[ContBlock:.+]]([[UPDATED]] +// +// CIR: ^[[InMemBlock]]: +// CIR: [[OVERFLOW_ARG_AREA_P:%.+]] = cir.get_member [[VASTED_VA_LIST]][2] {name = "overflow_arg_area"} +// CIR: [[OVERFLOW_ARG_AREA:%.+]] = cir.load [[OVERFLOW_ARG_AREA_P]] +// CIR: [[OFFSET:%.+]] = cir.const #cir.int<8> +// CIR: [[CASTED:%.+]] = cir.cast(bitcast, [[OVERFLOW_ARG_AREA]] : !cir.ptr) +// CIR: [[NEW_VALUE:%.+]] = cir.ptr_stride([[CASTED]] : !cir.ptr, [[OFFSET]] +// CIR: [[CASTED_P:%.+]] = cir.cast(bitcast, [[OVERFLOW_ARG_AREA_P]] : !cir.ptr>) +// CIR: store [[NEW_VALUE]], [[CASTED_P]] +// CIR: cir.br ^[[ContBlock]]([[OVERFLOW_ARG_AREA]] +// +// CIR: ^[[ContBlock]]([[ARG:.+]]: !cir.ptr +// CIR: [[CASTED_ARG_P:%.+]] = cir.cast(bitcast, [[ARG]] +// CIR: [[CASTED_ARG:%.+]] = cir.load align(16) [[CASTED_ARG_P]] +// CIR: store [[CASTED_ARG]], [[RES]]