[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

jhuber6 · 2025-08-29T16:36:27Z

Summary:
The masked load / store LLVM intrinsics take an argument for the
alignment. If the user is pessimistic about alignment they can provide a
value of 1 for an unaligned load. This patch updates infer-alignment to
increase the alignment value of the alignment argument if it is known
greater than the provided one.

Ignoring the gather / scatter versions for now since they contain many
pointers.

llvmbot · 2025-08-29T16:37:03Z

@llvm/pr-subscribers-llvm-transforms

Author: Joseph Huber (jhuber6)

Changes

Summary:
The masked load / store LLVM intrinsics take an argument for the
alignment. If the user is pessimistic about alignment they can provide a
value of 1 for an unaligned load. This patch updates instcombine to
increase the alignment value of the alignment argument if it is known
greater than the provided one.

Ignoring the gather / scatter versions for now since they contain many
pointers.

Full diff: https://github.com/llvm/llvm-project/pull/156057.diff

3 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+34-14)
(modified) llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll (+1-1)
(modified) llvm/test/Transforms/InstCombine/masked_intrinsics.ll (+31)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 42b65dde67255..7e50e55ae24c8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -288,8 +288,11 @@ Instruction *InstCombinerImpl::SimplifyAnyMemSet(AnyMemSetInst *MI) {
 // * Narrow width by halfs excluding zero/undef lanes
 Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
   Value *LoadPtr = II.getArgOperand(0);
-  const Align Alignment =
-      cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+  Align Alignment = cast<ConstantInt>(II.getArgOperand(1))->getAlignValue();
+
+  Align KnownAlign = getKnownAlignment(LoadPtr, DL, &II, &AC, &DT);
+  if (Alignment < KnownAlign)
+    Alignment = KnownAlign;
 
   // If the mask is all ones or undefs, this is a plain vector load of the 1st
   // argument.
@@ -310,6 +313,15 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
     return Builder.CreateSelect(II.getArgOperand(2), LI, II.getArgOperand(3));
   }
 
+  // Update the alignment if the known value is greater than the provided one.
+  if (cast<ConstantInt>(II.getArgOperand(1))->getAlignValue() < Alignment) {
+    SmallVector<Value *> Args(II.arg_begin(), II.arg_end());
+    Args[1] = Builder.getInt32(Alignment.value());
+    CallInst *CI = Builder.CreateCall(II.getCalledFunction(), Args);
+    CI->copyMetadata(II);
+    return CI;
+  }
+
   return nullptr;
 }
 
@@ -317,33 +329,41 @@ Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
 // * Single constant active lane -> store
 // * Narrow width by halfs excluding zero/undef lanes
 Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
-  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
-  if (!ConstMask)
-    return nullptr;
+  Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
+
+  Align KnownAlign = getKnownAlignment(II.getArgOperand(1), DL, &II, &AC, &DT);
+  if (Alignment < KnownAlign)
+    Alignment = KnownAlign;
 
   // If the mask is all zeros, this instruction does nothing.
-  if (ConstMask->isNullValue())
+  auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(3));
+  if (ConstMask && ConstMask->isNullValue())
     return eraseInstFromFunction(II);
 
   // If the mask is all ones, this is a plain vector store of the 1st argument.
-  if (ConstMask->isAllOnesValue()) {
+  if (ConstMask && ConstMask->isAllOnesValue()) {
     Value *StorePtr = II.getArgOperand(1);
-    Align Alignment = cast<ConstantInt>(II.getArgOperand(2))->getAlignValue();
     StoreInst *S =
         new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
     S->copyMetadata(II);
     return S;
   }
 
-  if (isa<ScalableVectorType>(ConstMask->getType()))
+  if (ConstMask && isa<ScalableVectorType>(ConstMask->getType()))
     return nullptr;
 
   // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
-  APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
-  APInt PoisonElts(DemandedElts.getBitWidth(), 0);
-  if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
-                                            PoisonElts))
-    return replaceOperand(II, 0, V);
+  if (ConstMask) {
+    APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
+    APInt PoisonElts(DemandedElts.getBitWidth(), 0);
+    if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
+                                              PoisonElts))
+      return replaceOperand(II, 0, V);
+  }
+
+  // Update the alignment if the known value is greater than the provided one.
+  if (cast<ConstantInt>(II.getArgOperand(2))->getAlignValue() < Alignment)
+    return replaceOperand(II, 2, Builder.getInt32(Alignment.value()));
 
   return nullptr;
 }
diff --git a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
index 918ea605a10bf..6ba52c178b8d4 100644
--- a/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
+++ b/llvm/test/Transforms/InstCombine/load-store-masked-constant-array.ll
@@ -7,7 +7,7 @@
 define void @combine_masked_load_store_from_constant_array(ptr %ptr) {
 ; CHECK-LABEL: @combine_masked_load_store_from_constant_array(
 ; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.whilelt.nxv2i1.i32(i32 0, i32 10)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 8, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i64> @llvm.masked.load.nxv2i64.p0(ptr nonnull @contant_int_array, i32 16, <vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> zeroinitializer)
 ; CHECK-NEXT:    call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP2]], ptr [[PTR:%.*]], i32 1, <vscale x 2 x i1> [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
index 8f7683419a82a..6c1168e6c1c70 100644
--- a/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
+++ b/llvm/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -439,3 +439,34 @@ define <2 x i64> @negative_gather_v2i64_uniform_ptrs_no_all_active_mask(ptr %src
 declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
 declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
 
+; Alignment tests
+
+define <2 x i32> @unaligned_load(<2 x i1> %mask, ptr %ptr) {
+; CHECK-LABEL: @unaligned_load(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 64) ]
+; CHECK-NEXT:    [[MASKED_LOAD:%.*]] = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr [[PTR]], i32 64, <2 x i1> [[MASK:%.*]], <2 x i32> poison)
+; CHECK-NEXT:    ret <2 x i32> [[MASKED_LOAD]]
+;
+entry:
+  call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 64) ]
+  %masked_load = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %ptr, i32 1, <2 x i1> %mask, <2 x i32> poison)
+  ret <2 x i32> %masked_load
+}
+
+define void @unaligned_store(<2 x i1> %mask, <2 x i32> %val, ptr %ptr) {
+; CHECK-LABEL: @unaligned_store(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 64) ]
+; CHECK-NEXT:    tail call void @llvm.masked.store.v2i32.p0(<2 x i32> [[VAL:%.*]], ptr [[PTR]], i32 64, <2 x i1> [[MASK:%.*]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 64) ]
+  tail call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %ptr, i32 1, <2 x i1> %mask)
+  ret void
+}
+
+declare void @llvm.assume(i1)
+declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>)

nikic

This should happen in the InferAlignment pass instead.

nikic · 2025-08-29T17:53:04Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

jhuber6 · 2025-08-29T18:34:18Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

Yeah, I was wondering why it was needed since the compress and expand variants don't use it. I'm guessing that would need some auto-upgrade handling or something.

…s if known Summary: The masked load / store LLVM intrinsics take an argument for the alignment. If the user is pessimistic about alignment they can provide a value of `1` for an unaligned load. This patch updates infer-alignment to increase the alignment value of the alignment argument if it is known greater than the provided one. Ignoring the gather / scatter versions for now since they contain many pointers.

jhuber6 · 2025-08-29T19:15:58Z

This should happen in the InferAlignment pass instead.

Done.

llvm/lib/Transforms/Scalar/InferAlignment.cpp

nikic · 2025-08-30T20:54:35Z

As a side note, we should really replace this argument with an align attribute on the pointer parameter...

Yeah, I was wondering why it was needed since the compress and expand variants don't use it. I'm guessing that would need some auto-upgrade handling or something.

Right. We've done this change for the memcpy etc intrinsics in the past:

llvm-project/llvm/lib/IR/AutoUpgrade.cpp

Lines 5100 to 5133 in d39772c

    
           case Intrinsic::memcpy: 
        
           case Intrinsic::memmove: 
        
           case Intrinsic::memset: { 
        
             // We have to make sure that the call signature is what we're expecting. 
        
             // We only want to change the old signatures by removing the alignment arg: 
        
             //  @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1) 
        
             //    -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1) 
        
             //  @llvm.memset...(i8*, i8, i[32|64], i32, i1) 
        
             //    -> @llvm.memset...(i8*, i8, i[32|64], i1) 
        
             // Note: i8*'s in the above can be any pointer type 
        
             if (CI->arg_size() != 5) { 
        
               DefaultCase(); 
        
               return; 
        
             } 
        
             // Remove alignment argument (3), and add alignment attributes to the 
        
             // dest/src pointers. 
        
             Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1), 
        
                               CI->getArgOperand(2), CI->getArgOperand(4)}; 
        
             NewCall = Builder.CreateCall(NewFn, Args); 
        
             AttributeList OldAttrs = CI->getAttributes(); 
        
             AttributeList NewAttrs = AttributeList::get( 
        
                 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(), 
        
                 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1), 
        
                  OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)}); 
        
             NewCall->setAttributes(NewAttrs); 
        
             auto *MemCI = cast<MemIntrinsic>(NewCall); 
        
             // All mem intrinsics support dest alignment. 
        
             const ConstantInt *Align = cast<ConstantInt>(CI->getArgOperand(3)); 
        
             MemCI->setDestAlignment(Align->getMaybeAlignValue()); 
        
             // Memcpy/Memmove also support source alignment. 
        
             if (auto *MTI = dyn_cast<MemTransferInst>(MemCI)) 
        
               MTI->setSourceAlignment(Align->getMaybeAlignValue()); 
        
             break; 
        
           }

nikic

LGTM

llvm/lib/Transforms/Scalar/InferAlignment.cpp

RKSimon

LGTM with one very petty minor - cheers!

RKSimon · 2025-08-31T09:58:53Z

llvm/lib/Transforms/Scalar/InferAlignment.cpp

-  // TODO: Also handle memory intrinsics.
-  return false;
+
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);


(style) auto *II = dyn_cast<IntrinsicInst>(I);

llvm-ci · 2025-09-02T11:13:54Z

LLVM Buildbot has detected a new failure on builder cross-project-tests-sie-ubuntu-dwarf5 running on doug-worker-1b while building llvm at step 6 "test-build-unified-tree-check-cross-project".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/163/builds/25722

Here is the relevant piece of the build log for the reference

Step 6 (test-build-unified-tree-check-cross-project) failure: test (failure)
******************** TEST 'cross-project-tests :: debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp' FAILED ********************
Exit Code: 2

Command Output (stderr):
--
clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp # RUN: at line 10
+ clang++ -O0 -glldb -std=gnu++11 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp -o /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp
"/usr/bin/python3.10" "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py" test --fail-lt 1.0 -w --debugger lldb-dap --lldb-executable "/home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap" --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp | /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp # RUN: at line 11
+ /usr/bin/python3.10 /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/dexter.py test --fail-lt 1.0 -w --debugger lldb-dap --lldb-executable /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/lldb-dap --binary /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/projects/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/Output/float_range_no_arg.cpp.tmp -- /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp
+ /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/build/bin/FileCheck /home/buildbot/buildbot-root/cross-project-tests-sie-ubuntu-dwarf5/llvm-project/cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/float_range_watch/float_range_no_arg.cpp


****************************************

alexfh · 2025-09-22T10:35:49Z

We've started seeing clang crashes in LLVM verifier accompanied with masked_store: alignment must be a power of 2 errors. Is it a known issue? I'm working on a test case.

alexfh · 2025-09-22T10:45:20Z

@jhuber6 I came up with a reduced test case: https://gcc.godbolt.org/z/Pvffo4e8G

target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define <2 x double> @quux() #0 {
bb:
  %call = call <2 x i64> @snork()
  %call1 = call <2 x double> @hoge(<2 x i64> %call)
  ret <2 x double> %call1
}

define <2 x double> @hoge(<2 x i64> %arg) #0 {
bb:
  %call = call <2 x double> @llvm.x86.avx.maskload.pd(ptr null, <2 x i64> %arg)
  ret <2 x double> %call
}

define <2 x i64> @snork() {
bb:
  %call = call <2 x i64> @hoge.1()
  %call1 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %call)
  ret <2 x i64> %call1
}

; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: read)
declare <2 x double> @llvm.x86.avx.maskload.pd(ptr, <2 x i64>) #1

define <2 x i64> @hoge.1() {
bb:
  %call = call <2 x i64> @blam(i64 -1)
  ret <2 x i64> %call
}

define <2 x i64> @blam(i64 %arg) {
bb:
  %insertelement = insertelement <2 x i64> zeroinitializer, i64 %arg, i64 0
  ret <2 x i64> %insertelement
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) #2

attributes #0 = { "target-features"="+avx" }
attributes #1 = { nocallback nofree nosync nounwind willreturn memory(argmem: read) }
attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

masked_load: alignment must be a power of 2
  %call.i = tail call <2 x double> @llvm.masked.load.v2f64.p0(ptr null, i32 0, <2 x i1> <i1 true, i1 false>, <2 x double> <double poison, double 0.000000e+00>)
in function quux
fatal error: error in backend: Broken function found, compilation aborted!
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0.	Program arguments: /opt/compiler-explorer/clang-assertions-trunk/bin/clang++ -g -o /app/output.s -mllvm --x86-asm-syntax=intel -fno-verbose-asm -S --gcc-toolchain=/opt/compiler-explorer/gcc-snapshot -fcolor-diagnostics -fno-crash-diagnostics -x ir -O3 <source>
1.	Code generation
2.	Running pass 'Function Pass Manager' on module '<source>'.
3.	Running pass 'Module Verifier' on function '@quux'
 #0 0x0000000004193688 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4193688)
 #1 0x0000000004190ab4 llvm::sys::CleanupOnSignal(unsigned long) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4190ab4)
 #2 0x00000000040d5586 llvm::CrashRecoveryContext::HandleExit(int) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x40d5586)
 #3 0x0000000004187f5e llvm::sys::Process::Exit(int, bool) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4187f5e)
 #4 0x0000000000db73a0 LLVMErrorHandler(void*, char const*, bool) cc1_main.cpp:0:0
 #5 0x00000000040e02e3 llvm::report_fatal_error(llvm::Twine const&, bool) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x40e02e3)
 #6 0x00000000040e0441 (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x40e0441)
 #7 0x0000000003b513e2 (anonymous namespace)::VerifierLegacyPass::runOnFunction(llvm::Function&) Verifier.cpp:0:0
 #8 0x0000000003a9a648 llvm::FPPassManager::runOnFunction(llvm::Function&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x3a9a648)
 #9 0x0000000003a9a881 llvm::FPPassManager::runOnModule(llvm::Module&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x3a9a881)
#10 0x0000000003a9b0ef llvm::legacy::PassManagerImpl::run(llvm::Module&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x3a9b0ef)
#11 0x000000000444a95e clang::emitBackendOutput(clang::CompilerInstance&, clang::CodeGenOptions&, llvm::StringRef, llvm::Module*, clang::BackendAction, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>, std::unique_ptr<llvm::raw_pwrite_stream, std::default_delete<llvm::raw_pwrite_stream>>, clang::BackendConsumer*) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x444a95e)
#12 0x0000000004ace6a7 clang::CodeGenAction::ExecuteAction() (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4ace6a7)
#13 0x0000000004db9485 clang::FrontendAction::Execute() (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4db9485)
#14 0x0000000004d3535e clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4d3535e)
#15 0x0000000004eaedcd clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4eaedcd)
#16 0x0000000000db9b00 cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0xdb9b00)
#17 0x0000000000db062a ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&, llvm::ToolContext const&, llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) driver.cpp:0:0
#18 0x0000000000db07ad int llvm::function_ref<int (llvm::SmallVectorImpl<char const*>&)>::callback_fn<clang_main(int, char**, llvm::ToolContext const&)::'lambda'(llvm::SmallVectorImpl<char const*>&)>(long, llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0
#19 0x0000000004b36e59 void llvm::function_ref<void ()>::callback_fn<clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const::'lambda'()>(long) Job.cpp:0:0
#20 0x00000000040d54c4 llvm::CrashRecoveryContext::RunSafely(llvm::function_ref<void ()>) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x40d54c4)
#21 0x0000000004b3746f clang::driver::CC1Command::Execute(llvm::ArrayRef<std::optional<llvm::StringRef>>, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>>*, bool*) const (.part.0) Job.cpp:0:0
#22 0x0000000004af88a2 clang::driver::Compilation::ExecuteCommand(clang::driver::Command const&, clang::driver::Command const*&, bool) const (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4af88a2)
#23 0x0000000004af984e clang::driver::Compilation::ExecuteJobs(clang::driver::JobList const&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&, bool) const (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4af984e)
#24 0x0000000004b00f75 clang::driver::Driver::ExecuteCompilation(clang::driver::Compilation&, llvm::SmallVectorImpl<std::pair<int, clang::driver::Command const*>>&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0x4b00f75)
#25 0x0000000000db6001 clang_main(int, char**, llvm::ToolContext const&) (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0xdb6001)
#26 0x0000000000c67d44 main (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0xc67d44)
#27 0x000077384da29d90 (/lib/x86_64-linux-gnu/libc.so.6+0x29d90)
#28 0x000077384da29e40 __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x29e40)
#29 0x0000000000db00c5 _start (/opt/compiler-explorer/clang-assertions-trunk/bin/clang+++0xdb00c5)

jhuber6 · 2025-09-22T13:29:58Z

Thanks for reporting, seems your case causes it to think the alignment is 2^32 which truncates to 0 when set, which isn't considered valid. I think infinite alignment shows up on null pointers, so I probably need to verify that and not update it.

Likely we should also just try to remove this alignment argument entirely and use the pointer argument itself.

Summary: The changes made in llvm#156057 allows the alignment value to be increased. We assert effectively infinite alignment when the pointer argument is invalid / null. The problem is that for whatever reason the masked load / store functions use i32 for their alignment value which means this gets truncated to zero. Add a special check for this, long term we probably want to just remove this argument entirely.

Summary: The changes made in #156057 allows the alignment value to be increased. We assert effectively infinite alignment when the pointer argument is invalid / null. The problem is that for whatever reason the masked load / store functions use i32 for their alignment value which means this gets truncated to zero. Add a special check for this, long term we probably want to just remove this argument entirely.

…32 (#160109) Summary: The changes made in llvm/llvm-project#156057 allows the alignment value to be increased. We assert effectively infinite alignment when the pointer argument is invalid / null. The problem is that for whatever reason the masked load / store functions use i32 for their alignment value which means this gets truncated to zero. Add a special check for this, long term we probably want to just remove this argument entirely.

Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added llvm#156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.

…ointer (#156063) Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added #156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.

…ke scalar pointer (#156063) Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added llvm/llvm-project#156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.

…ointer (llvm#156063) Summary: Right now these enformce alignment, which isn't convenient for the user on platforms that support unaligned accesses. The options are to either permit passing the alignment manually, or just assume it's unaligned unless the user specifies it. I've added llvm#156057 which should make the requiested alignment show up on the intrinsic if the user passed `__builtin_assume_aligned`, however that's only with optimizations. This shouldn't cause issues unless the backend categorically decides to reject an unaligned access.

nikic · 2025-10-17T08:18:12Z

Got around to doing the alignment argument -> attribute change: #163802

jhuber6 requested a review from nikic as a code owner August 29, 2025 16:36

jhuber6 requested review from RKSimon, alexey-bataev, efriedma-quic and erichkeane August 29, 2025 16:36

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Aug 29, 2025

jhuber6 requested a review from arsenm August 29, 2025 17:07

jhuber6 mentioned this pull request Aug 29, 2025

[Clang] Change masked load / store builtin interface to take scalar pointer #156063

Merged

nikic requested changes Aug 29, 2025

View reviewed changes

jhuber6 force-pushed the masked_align branch from 881b483 to 13c384c Compare August 29, 2025 19:14

jhuber6 changed the title ~~[InstCombine] Increase alignment in masked load / store instrinsics if known~~ [InferAlignment] Increase alignment in masked load / store instrinsics if known Aug 29, 2025

nikic reviewed Aug 30, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

comments

a74177b

nikic approved these changes Aug 30, 2025

View reviewed changes

arsenm reviewed Aug 31, 2025

View reviewed changes

llvm/lib/Transforms/Scalar/InferAlignment.cpp Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/InferAlignment.cpp Outdated Show resolved Hide resolved

comments

b07bc27

RKSimon approved these changes Aug 31, 2025

View reviewed changes

jhuber6 merged commit abda8be into llvm:main Sep 2, 2025
9 checks passed

jhuber6 mentioned this pull request Sep 22, 2025

[InferAlignment] Fix updating alignment when larger than i32 #160109

Merged

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

[InferAlignment] Increase alignment in masked load / store instrinsics if known #156057

Uh oh!

Conversation

jhuber6 commented Aug 29, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 29, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

nikic commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

jhuber6 commented Aug 29, 2025

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic commented Aug 30, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

RKSimon left a comment

Choose a reason for hiding this comment

Uh oh!

RKSimon Aug 31, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Sep 2, 2025

Uh oh!

alexfh commented Sep 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

alexfh commented Sep 22, 2025

Uh oh!

jhuber6 commented Sep 22, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

nikic commented Oct 17, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

7 participants

jhuber6 commented Aug 29, 2025 •

edited

Loading

alexfh commented Sep 22, 2025 •

edited

Loading

jhuber6 commented Sep 22, 2025 •

edited

Loading