diff --git a/clang/docs/AllocToken.rst b/clang/docs/AllocToken.rst index b65e18ccfa967..1a740e5e22c29 100644 --- a/clang/docs/AllocToken.rst +++ b/clang/docs/AllocToken.rst @@ -49,6 +49,39 @@ change or removal. These may (experimentally) be selected with ``-Xclang * ``increment``: This mode assigns a simple, incrementally increasing token ID to each allocation site. +The following command-line options affect generated token IDs: + +* ``-falloc-token-max=`` + Configures the maximum number of tokens. No max by default (tokens bounded + by ``SIZE_MAX``). + +Querying Token IDs with ``__builtin_infer_alloc_token`` +======================================================= + +For use cases where the token ID must be known at compile time, Clang provides +a builtin function: + +.. code-block:: c + + size_t __builtin_infer_alloc_token(, ...); + +This builtin returns the token ID inferred from its argument expressions, which +mirror arguments normally passed to any allocation function. The argument +expressions are **unevaluated**, so it can be used with expressions that would +have side effects without any runtime impact. + +For example, it can be used as follows: + +.. code-block:: c + + struct MyType { ... }; + void *__partition_alloc(size_t size, size_t partition); + #define partition_alloc(...) __partition_alloc(__VA_ARGS__, __builtin_infer_alloc_token(__VA_ARGS__)) + + void foo(void) { + MyType *x = partition_alloc(sizeof(*x)); + } + Allocation Token Instrumentation ================================ @@ -70,16 +103,6 @@ example: // Instrumented: ptr = __alloc_token_malloc(size, ); -The following command-line options affect generated token IDs: - -* ``-falloc-token-max=`` - Configures the maximum number of tokens. No max by default (tokens bounded - by ``SIZE_MAX``). - - .. code-block:: console - - % clang++ -fsanitize=alloc-token -falloc-token-max=512 example.cc - Runtime Interface ----------------- diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index db2b0f6fd5027..5bd3a071ee33e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -268,6 +268,9 @@ Non-comprehensive list of changes in this release allocator-level heap organization strategies. A feature to instrument all allocation functions with a token ID can be enabled via the ``-fsanitize=alloc-token`` flag. +- A builtin ``__builtin_infer_alloc_token(, ...)`` is provided to allow + compile-time querying of allocation token IDs, where the builtin arguments + mirror those normally passed to an allocation function. New Compiler Flags ------------------ diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 23ad11ac9f792..2be61f2479e44 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -803,16 +803,6 @@ static void addSanitizers(const Triple &TargetTriple, MPM.addPass(DataFlowSanitizerPass(LangOpts.NoSanitizeFiles, PB.getVirtualFileSystemPtr())); } - - if (LangOpts.Sanitize.has(SanitizerKind::AllocToken)) { - if (Level == OptimizationLevel::O0) { - // The default pass builder only infers libcall function attrs when - // optimizing, so we insert it here because we need it for accurate - // memory allocation function detection. - MPM.addPass(InferFunctionAttrsPass()); - } - MPM.addPass(AllocTokenPass(getAllocTokenOptions(LangOpts, CodeGenOpts))); - } }; if (ClSanitizeOnOptimizerEarlyEP) { PB.registerOptimizerEarlyEPCallback( @@ -855,6 +845,23 @@ static void addSanitizers(const Triple &TargetTriple, } } +static void addAllocTokenPass(const Triple &TargetTriple, + const CodeGenOptions &CodeGenOpts, + const LangOptions &LangOpts, PassBuilder &PB) { + PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM, + OptimizationLevel Level, + ThinOrFullLTOPhase) { + if (Level == OptimizationLevel::O0 && + LangOpts.Sanitize.has(SanitizerKind::AllocToken)) { + // The default pass builder only infers libcall function attrs when + // optimizing, so we insert it here because we need it for accurate + // memory allocation function detection with -fsanitize=alloc-token. + MPM.addPass(InferFunctionAttrsPass()); + } + MPM.addPass(AllocTokenPass(getAllocTokenOptions(LangOpts, CodeGenOpts))); + }); +} + void EmitAssemblyHelper::RunOptimizationPipeline( BackendAction Action, std::unique_ptr &OS, std::unique_ptr &ThinLinkOS, BackendConsumer *BC) { @@ -1109,6 +1116,7 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (!IsThinLTOPostLink) { addSanitizers(TargetTriple, CodeGenOpts, LangOpts, PB); addKCFIPass(TargetTriple, LangOpts, PB); + addAllocTokenPass(TargetTriple, CodeGenOpts, LangOpts, PB); } if (std::optional Options = diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 9ee810c9d5775..8a85789a256d5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4525,6 +4525,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(AI); } + case Builtin::BI__builtin_infer_alloc_token: { + llvm::MDNode *MDN = buildAllocToken(E); + llvm::Value *MDV = MetadataAsValue::get(getLLVMContext(), MDN); + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::alloc_token_id, {IntPtrTy}); + llvm::CallBase *TokenID = Builder.CreateCall(F, MDV); + return RValue::get(TokenID); + } + case Builtin::BIbzero: case Builtin::BI__builtin_bzero: { Address Dest = EmitPointerWithAlignment(E->getArg(0)); diff --git a/clang/test/CodeGen/lto-newpm-pipeline.c b/clang/test/CodeGen/lto-newpm-pipeline.c index ea9784a76f923..dceaaf136ebfc 100644 --- a/clang/test/CodeGen/lto-newpm-pipeline.c +++ b/clang/test/CodeGen/lto-newpm-pipeline.c @@ -32,10 +32,12 @@ // CHECK-FULL-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-FULL-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CoroConditionalWrapper +// CHECK-FULL-O0-NEXT: Running pass: AllocTokenPass +// CHECK-FULL-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-FULL-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-FULL-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-FULL-O0-NEXT: Running pass: AnnotationRemarksPass -// CHECK-FULL-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-FULL-O0-NEXT: Running pass: VerifierPass // CHECK-FULL-O0-NEXT: Running pass: BitcodeWriterPass @@ -46,10 +48,12 @@ // CHECK-THIN-O0-NEXT: Running pass: AlwaysInlinerPass // CHECK-THIN-O0-NEXT: Running analysis: ProfileSummaryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CoroConditionalWrapper +// CHECK-THIN-O0-NEXT: Running pass: AllocTokenPass +// CHECK-THIN-O0-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis +// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-THIN-O0-NEXT: Running pass: CanonicalizeAliasesPass // CHECK-THIN-O0-NEXT: Running pass: NameAnonGlobalPass // CHECK-THIN-O0-NEXT: Running pass: AnnotationRemarksPass -// CHECK-THIN-O0-NEXT: Running analysis: TargetLibraryAnalysis // CHECK-THIN-O0-NEXT: Running pass: VerifierPass // CHECK-THIN-O0-NEXT: Running pass: ThinLTOBitcodeWriterPass diff --git a/clang/test/CodeGenCXX/alloc-token-builtin.cpp b/clang/test/CodeGenCXX/alloc-token-builtin.cpp new file mode 100644 index 0000000000000..2d0b8e1666faf --- /dev/null +++ b/clang/test/CodeGenCXX/alloc-token-builtin.cpp @@ -0,0 +1,77 @@ +// To test IR generation of the builtin without evaluating the LLVM intrinsic, +// we set the mode to a stateful mode, which prohibits constant evaluation. +// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-mode=random -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-CODEGEN +// RUN: %clang_cc1 -triple x86_64-linux-gnu -Werror -std=c++20 -emit-llvm -falloc-token-max=2 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LOWER + +extern "C" void *my_malloc(unsigned long, unsigned long); + +struct NoPtr { + int x; + long y; +}; + +struct WithPtr { + int a; + char *buf; +}; + +int unevaluated_fn(); + +// CHECK-LABEL: @_Z16test_builtin_intv( +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]]) +// CHECK-LOWER: ret i64 0 +unsigned long test_builtin_int() { + return __builtin_infer_alloc_token(sizeof(1)); +} + +// CHECK-LABEL: @_Z16test_builtin_ptrv( +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_PTR:[0-9]+]]) +// CHECK-LOWER: ret i64 1 +unsigned long test_builtin_ptr() { + return __builtin_infer_alloc_token(sizeof(int *)); +} + +// CHECK-LABEL: @_Z25test_builtin_struct_noptrv( +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_NOPTR:[0-9]+]]) +// CHECK-LOWER: ret i64 0 +unsigned long test_builtin_struct_noptr() { + return __builtin_infer_alloc_token(sizeof(NoPtr)); +} + +// CHECK-LABEL: @_Z25test_builtin_struct_w_ptrv( +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_WITHPTR:[0-9]+]]) +// CHECK-LOWER: ret i64 1 +unsigned long test_builtin_struct_w_ptr() { + return __builtin_infer_alloc_token(sizeof(WithPtr), 123); +} + +// CHECK-LABEL: @_Z24test_builtin_unevaluatedv( +// CHECK-NOT: call{{.*}}unevaluated_fn +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_INT:[0-9]+]]) +// CHECK-LOWER: ret i64 0 +unsigned long test_builtin_unevaluated() { + return __builtin_infer_alloc_token(sizeof(int) * unevaluated_fn()); +} + +// CHECK-LABEL: @_Z36test_builtin_unsequenced_unevaluatedi( +// CHECK: add nsw +// CHECK-NOT: add nsw +// CHECK-CODEGEN: %[[REG:[0-9]+]] = call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]]) +// CHECK-CODEGEN: call{{.*}}@my_malloc({{.*}}, i64 noundef %[[REG]]) +// CHECK-LOWER: call{{.*}}@my_malloc({{.*}}, i64 noundef 0) +void test_builtin_unsequenced_unevaluated(int x) { + my_malloc(++x, __builtin_infer_alloc_token(++x)); +} + +// CHECK-LABEL: @_Z20test_builtin_unknownv( +// CHECK-CODEGEN: call i64 @llvm.alloc.token.id.i64(metadata ![[META_UNKNOWN:[0-9]+]]) +// CHECK-LOWER: ret i64 0 +unsigned long test_builtin_unknown() { + return __builtin_infer_alloc_token(4096); +} + +// CHECK-CODEGEN: ![[META_INT]] = !{!"int", i1 false} +// CHECK-CODEGEN: ![[META_PTR]] = !{!"int *", i1 true} +// CHECK-CODEGEN: ![[META_NOPTR]] = !{!"NoPtr", i1 false} +// CHECK-CODEGEN: ![[META_WITHPTR]] = !{!"WithPtr", i1 true} +// CHECK-CODEGEN: ![[META_UNKNOWN]] = !{}