diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 47cc6ff7655ca..12440ab1e2615 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -5206,10 +5206,11 @@ void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, Loop *L = LI.getLoopFor(CanonicalLoop->getHeader()); if (AlignedVars.size()) { InsertPointTy IP = Builder.saveIP(); - Builder.SetInsertPoint(CanonicalLoop->getPreheader()->getTerminator()); for (auto &AlignedItem : AlignedVars) { Value *AlignedPtr = AlignedItem.first; Value *Alignment = AlignedItem.second; + Instruction *loadInst = dyn_cast(AlignedPtr); + Builder.SetInsertPoint(loadInst->getNextNode()); Builder.CreateAlignmentAssumption(F->getDataLayout(), AlignedPtr, Alignment); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 19d80fbbd699b..8df144ad20be7 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -1872,6 +1872,24 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, llvm::MapVector alignedVars; llvm::omp::OrderKind order = convertOrderKind(simdOp.getOrder()); + llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); + std::optional alignmentValues = simdOp.getAlignments(); + mlir::OperandRange operands = simdOp.getAlignedVars(); + for (size_t i = 0; i < operands.size(); ++i) { + llvm::Value *alignment = nullptr; + llvm::Value *llvmVal = moduleTranslation.lookupValue(operands[i]); + llvm::Type *ty = llvmVal->getType(); + if (auto intAttr = llvm::dyn_cast((*alignmentValues)[i])) { + alignment = builder.getInt64(intAttr.getInt()); + assert(ty->isPointerTy() && "Invalid type for aligned variable"); + assert(alignment && "Invalid alignment value"); + auto curInsert = builder.saveIP(); + builder.SetInsertPoint(sourceBlock->getTerminator()); + llvmVal = builder.CreateLoad(ty, llvmVal); + builder.restoreIP(curInsert); + alignedVars[llvmVal] = alignment; + } + } ompBuilder->applySimd(loopInfo, alignedVars, simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr()) diff --git a/mlir/test/Target/LLVMIR/openmp-simd-aligned.mlir b/mlir/test/Target/LLVMIR/openmp-simd-aligned.mlir new file mode 100644 index 0000000000000..f418742ef5dfa --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-simd-aligned.mlir @@ -0,0 +1,63 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +//CHECK-LABEL: define void @_QPsimd_aligned_pointer() { +//CHECK: %[[A_PTR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, i64 1, align 8 +//CHECK: %[[A_VAL:.*]] = load ptr, ptr %[[A_PTR]], align 8 +//CHECK: call void @llvm.assume(i1 true) [ "align"(ptr %[[A_VAL]], i64 256) ] +llvm.func @_QPsimd_aligned_pointer() { + %1 = llvm.mlir.constant(1 : i64) : i64 + %2 = llvm.alloca %1 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> {bindc_name = "x"} : (i64) -> !llvm.ptr + %3 = llvm.alloca %1 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i32) : i32 + %5 = llvm.mlir.constant(10 : i32) : i32 + %6 = llvm.mlir.constant(1 : i32) : i32 + omp.simd aligned(%2 : !llvm.ptr -> 256 : i64) { + omp.loop_nest (%arg0) : i32 = (%4) to (%5) inclusive step (%6) { + llvm.store %arg0, %3 : i32, !llvm.ptr + omp.yield + } + omp.terminator + } + llvm.return +} + +//CHECK-LABEL: define void @_QPsimd_aligned_cptr() { +//CHECK: %[[A_CPTR:.*]] = alloca %_QM__fortran_builtinsT__builtin_c_ptr, i64 1, align 8 +//CHECK: %[[A_VAL:.*]] = load ptr, ptr %[[A_CPTR]], align 8 +//CHECK: call void @llvm.assume(i1 true) [ "align"(ptr %[[A_VAL]], i64 256) ] +llvm.func @_QPsimd_aligned_cptr() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<"_QM__fortran_builtinsT__builtin_c_ptr", (i64)> {bindc_name = "a"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i64) : i64 + %3 = llvm.alloca %2 x i32 {bindc_name = "i", pinned} : (i64) -> !llvm.ptr + %4 = llvm.mlir.constant(1 : i32) : i32 + %5 = llvm.mlir.constant(10 : i32) : i32 + %6 = llvm.mlir.constant(1 : i32) : i32 + omp.simd aligned(%1 : !llvm.ptr -> 256 : i64) { + omp.loop_nest (%arg0) : i32 = (%4) to (%5) inclusive step (%6) { + llvm.store %arg0, %3 : i32, !llvm.ptr + omp.yield + } + omp.terminator + } + llvm.return +} + +//CHECK-LABEL: define void @_QPsimd_aligned_allocatable() { +//CHECK: %[[A_ADDR:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +//CHECK: %[[A_VAL:.*]] = load ptr, ptr %[[A_ADDR]], align 8 +//CHECK: call void @llvm.assume(i1 true) [ "align"(ptr %[[A_VAL]], i64 256) ] +llvm.func @_QPsimd_aligned_allocatable() { + %0 = llvm.mlir.constant(1 : i64) : i64 + %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {bindc_name = "a"} : (i64) -> !llvm.ptr + %2 = llvm.mlir.constant(1 : i32) : i32 + %3 = llvm.mlir.constant(10 : i32) : i32 + %4 = llvm.mlir.constant(1 : i32) : i32 + omp.simd aligned(%1 : !llvm.ptr -> 256 : i64) { + omp.loop_nest (%arg0) : i32 = (%2) to (%3) inclusive step (%4) { + omp.yield + } + omp.terminator + } + llvm.return +}