Skip to content

Conversation

@kaviya2510
Copy link
Contributor

Added translation support for taskloop construct.

@kaviya2510 kaviya2510 marked this pull request as draft November 7, 2025 07:23
@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2025

@llvm/pr-subscribers-mlir-openmp

@llvm/pr-subscribers-mlir-llvm

Author: Kaviya Rajendiran (kaviya2510)

Changes

Added translation support for taskloop construct.


Patch is 32.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166903.diff

6 Files Affected:

  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+16)
  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+1)
  • (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+199)
  • (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+254-3)
  • (added) mlir/test/Target/LLVMIR/openmp-taskloop.mlir (+151)
  • (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+1-14)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index b3d7ab4acf303..18828380abd32 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1359,6 +1359,22 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// Generator for `#omp taskloop`
+  ///
+  /// \param Loc The location where the taskloop construct was encountered.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param LoopInfo Callback that return the CLI
+  /// \param LBVal Lowerbound value of loop
+  /// \param UBVal Upperbound value of loop
+  /// \param StepVal Step value of loop
+  /// \param Tied True if the task is tied, false if the task is untied.
+  LLVM_ABI InsertPointOrErrorTy createTaskloop(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB,
+      llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
+      Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true);
+
   /// Generator for `#omp task`
   ///
   /// \param Loc The location where the task construct was encountered.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 46b3d53a4b408..032495dfe9d61 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -95,6 +95,7 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
 __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
 __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
 __OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
+__OMP_STRUCT_TYPE(Taskloop, kmp_task_info, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr, Int64, Int64, Int64)
 __OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
                   Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32, Int32)
 __OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fff9a815e5368..e88e722b1370e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1933,6 +1933,205 @@ static Value *emitTaskDependencies(
   return DepArray;
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    BodyGenCallbackTy BodyGenCB,
+    llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> loopInfo,
+    Value *LBVal, Value *UBVal, Value *StepVal, bool Tied) {
+
+  if (!updateToLocation(Loc))
+    return InsertPointTy();
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+
+  BasicBlock *TaskloopExitBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.exit");
+  BasicBlock *TaskloopBodyBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.body");
+  BasicBlock *TaskloopAllocaBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.alloca");
+
+  InsertPointTy TaskloopAllocaIP =
+      InsertPointTy(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+  InsertPointTy TaskloopBodyIP =
+      InsertPointTy(TaskloopBodyBB, TaskloopBodyBB->begin());
+
+  if (Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
+    return Err;
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> result = loopInfo();
+  if (!result) {
+    return result.takeError();
+  }
+
+  llvm::CanonicalLoopInfo *CLI = result.get();
+  OutlineInfo OI;
+  OI.EntryBB = TaskloopAllocaBB;
+  OI.OuterAllocaBB = AllocaIP.getBlock();
+  OI.ExitBB = TaskloopExitBB;
+
+  // Add the thread ID argument.
+  SmallVector<Instruction *, 4> ToBeDeleted;
+  // dummy instruction to be used as a fake argument
+  OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+      Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false));
+
+  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+                      TaskloopAllocaBB, CLI, Loc,
+                      ToBeDeleted](Function &OutlinedFn) mutable {
+    // Replace the Stale CI by appropriate RTL function call.
+    assert(OutlinedFn.hasOneUse() &&
+           "there must be a single user for the outlined function");
+    CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+
+    // HasShareds is true if any variables are captured in the outlined region,
+    // false otherwise.
+    bool HasShareds = StaleCI->arg_size() > 1;
+    Builder.SetInsertPoint(StaleCI);
+
+    // Gather the arguments for emitting the runtime call for
+    // @__kmpc_omp_task_alloc
+    Function *TaskAllocFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+
+    Value *ThreadID = getOrCreateThreadID(Ident);
+
+    // Emit runtime call for @__kmpc_taskgroup
+    Function *TaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+    Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+
+    // The flags are set to 1 if the task is tied, 0 otherwise.
+    Value *Flags = Builder.getInt32(Tied);
+
+    Value *TaskSize = Builder.getInt64(
+        divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
+
+    Value *SharedsSize = Builder.getInt64(0);
+    if (HasShareds) {
+      AllocaInst *ArgStructAlloca =
+          dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+      assert(ArgStructAlloca &&
+             "Unable to find the alloca instruction corresponding to arguments "
+             "for extracted function");
+      StructType *ArgStructType =
+          dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+      assert(ArgStructType && "Unable to find struct type corresponding to "
+                              "arguments for extracted function");
+      SharedsSize =
+          Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+    }
+
+    // Emit the @__kmpc_omp_task_alloc runtime call
+    // The runtime call returns a pointer to an area where the task captured
+    // variables must be copied before the task is run (TaskData)
+    CallInst *TaskData = Builder.CreateCall(
+        TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+                      /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
+                      /*task_func=*/&OutlinedFn});
+
+    // Get the pointer to loop lb, ub, step from task ptr
+    // and set up the lowerbound,upperbound and step values
+    llvm::Value *lb =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5);
+    //  Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty());
+    Builder.CreateStore(LBVal, lb);
+
+    llvm::Value *ub =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6);
+    Builder.CreateStore(UBVal, ub);
+
+    llvm::Value *step =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7);
+    Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty());
+    Builder.CreateStore(Step_ext, step);
+    llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step);
+
+    if (HasShareds) {
+      Value *Shareds = StaleCI->getArgOperand(1);
+      Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
+      Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
+      Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
+                           SharedsSize);
+    }
+
+    // set up the arguments for emitting kmpc_taskloop runtime call
+    // setting default values for ifval, nogroup, sched, grainsize, task_dup
+    Value *IfVal = Builder.getInt32(1);
+    Value *NoGroup = Builder.getInt32(1);
+    Value *Sched = Builder.getInt32(0);
+    Value *GrainSize = Builder.getInt64(0);
+    Value *TaskDup = Constant::getNullValue(Builder.getPtrTy());
+
+    Value *Args[] = {Ident,    ThreadID, TaskData, IfVal,     lb,     ub,
+                     loadstep, NoGroup,  Sched,    GrainSize, TaskDup};
+
+    // taskloop runtime call
+    Function *TaskloopFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
+    Builder.CreateCall(TaskloopFn, Args);
+
+    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
+    Function *EndTaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+    Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+
+    StaleCI->eraseFromParent();
+
+    Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+
+    if (HasShareds) {
+      LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
+      OutlinedFn.getArg(1)->replaceUsesWithIf(
+          Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
+    }
+
+    Value *IV = CLI->getIndVar();
+    Type *IVTy = IV->getType();
+    Constant *One = ConstantInt::get(IVTy, 1);
+
+    Value *task_lb = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 5, "gep_lb");
+    Value *LowerBound = Builder.CreateLoad(IVTy, task_lb, "lb");
+
+    Value *task_ub = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 6, "gep_ub");
+    Value *UpperBound = Builder.CreateLoad(IVTy, task_ub, "ub");
+
+    Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+
+    Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound);
+    Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
+    // set the trip count in the CLI
+    CLI->setTripCount(TripCount);
+
+    Builder.SetInsertPoint(CLI->getBody(),
+                           CLI->getBody()->getFirstInsertionPt());
+
+    llvm::BasicBlock *Body = CLI->getBody();
+    for (llvm::Instruction &I : *Body) {
+      if (auto *Add = llvm::dyn_cast<llvm::BinaryOperator>(&I)) {
+        if (Add->getOpcode() == llvm::Instruction::Add) {
+          if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) {
+            // update the starting index of the loop
+            Add->setOperand(1, LowerBound);
+          }
+        }
+      }
+    }
+
+    for (Instruction *I : llvm::reverse(ToBeDeleted)) {
+      I->eraseFromParent();
+    }
+  };
+
+  addOutlineInfo(std::move(OI));
+  Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->begin());
+  return Builder.saveIP();
+}
+
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8edec990eaaba..d69fcd3db0413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -323,6 +323,18 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkFinal = [&todo](auto op, LogicalResult &result) {
+    if (op.getFinal())
+      result = todo("final");
+  };
+  auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
+    if (op.getGrainsize())
+      result = todo("grainsize");
+  };
+  auto checkIf = [](auto op, LogicalResult &) {
+    if (op.getIfExpr())
+      op.emitWarning("if");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -340,10 +352,22 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
       result = todo("linear");
   };
+  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
+    if (op.getMergeable())
+      result = todo("mergeable");
+  };
+  auto checkNogroup = [&todo](auto op, LogicalResult &result) {
+    if (op.getNogroup())
+      result = todo("nogroup");
+  };
   auto checkNowait = [&todo](auto op, LogicalResult &result) {
     if (op.getNowait())
       result = todo("nowait");
   };
+  auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
+    if (op.getNumTasks())
+      result = todo("num_tasks");
+  };
   auto checkOrder = [&todo](auto op, LogicalResult &result) {
     if (op.getOrder() || op.getOrderMod())
       result = todo("order");
@@ -417,7 +441,15 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkNowait(op, result);
       })
       .Case([&](omp::TaskloopOp op) {
-        // TODO: Add other clauses check
+        checkAllocate(op, result);
+        checkFinal(op, result);
+        checkGrainsize(op, result);
+        checkIf(op, result);
+        checkInReduction(op, result);
+        checkMergeable(op, result);
+        checkNogroup(op, result);
+        checkNumTasks(op, result);
+        checkReduction(op, result);
         checkUntied(op, result);
         checkPriority(op, result);
       })
@@ -2097,6 +2129,8 @@ class TaskContextStructManager {
   /// private decls.
   void createGEPsToPrivateVars();
 
+  llvm::Value *isAllocated();
+
   /// De-allocate the task context structure.
   void freeStructPtr();
 
@@ -2177,13 +2211,26 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
   }
 }
 
+llvm::Value *TaskContextStructManager::isAllocated() {
+  if (!structPtr)
+    return nullptr;
+
+  return builder.CreateIsNotNull(structPtr);
+}
+
 void TaskContextStructManager::freeStructPtr() {
   if (!structPtr)
     return;
 
   llvm::IRBuilderBase::InsertPointGuard guard{builder};
-  // Ensure we don't put the call to free() after the terminator
-  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  llvm::BasicBlock *currentBlock = builder.GetInsertBlock();
+  if (currentBlock->getTerminator()) {
+    // Ensure we don't put the call to free() after the terminator
+    builder.SetInsertPoint(currentBlock->getTerminator());
+  } else {
+    // Insert the call to free() at the end of the current block
+    builder.SetInsertPoint(currentBlock);
+  }
   builder.CreateFree(structPtr);
 }
 
@@ -2419,6 +2466,207 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
+// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto taskloopOp = cast<omp::TaskloopOp>(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  // It stores the pointer of allocated firstprivate copies,
+  // which can be used later for freeing the allocated space.
+  SmallVector<llvm::Value *> llvmFirstPrivateVars;
+  PrivateVarsInfo privateVarsInfo(taskloopOp);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateVarsInfo.privatizers};
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+
+  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+  llvm::BasicBlock *taskloopStartBlock = llvm::BasicBlock::Create(
+      builder.getContext(), "omp.taskloop.start",
+      /*Parent=*/builder.GetInsertBlock()->getParent());
+  llvm::Instruction *branchToTaskloopStartBlock =
+      builder.CreateBr(taskloopStartBlock);
+  builder.SetInsertPoint(branchToTaskloopStartBlock);
+
+  llvm::BasicBlock *copyBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+  llvm::BasicBlock *initBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+  LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+      moduleTranslation, allocaIP);
+
+  // Allocate and initialize private variables
+  builder.SetInsertPoint(initBlock->getTerminator());
+
+  taskStructMgr.generateTaskContextStruct();
+  taskStructMgr.createGEPsToPrivateVars();
+
+  llvmFirstPrivateVars.resize(privateVarsInfo.blockArgs.size());
+  int index = 0;
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
+                       privateVarsInfo.blockArgs,
+                       taskStructMgr.getLLVMPrivateVarGEPs())) {
+    // To be handled inside the taskloop.
+    if (!privDecl.readsFromMold())
+      continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
+
+    llvm::Expected<llvm::Value *> privateVarOrErr =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (!privateVarOrErr)
+      return handleError(privateVarOrErr, *taskloopOp.getOperation());
+
+    llvmFirstPrivateVars[index++] = privateVarOrErr.get();
+
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+  }
+
+  // firstprivate copy region
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
+  if (failed(copyFirstPrivateVars(
+          taskloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
+          taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
+          taskloopOp.getPrivateNeedsBarrier())))
+    return llvm::failure();
+
+  // Set up inserttion point for call to createTaskloop()
+  builder.SetInsertPoint(taskloopStartBlock);
+
+  auto bodyCB = [&](InsertPointTy allocaIP,
+                    InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // translate the body of the taskloop:
+    builder.restoreIP(codegenIP);
+
+    llvm::BasicBlock *privInitBlock = nullptr;
+    privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
+    for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
+             privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
+             privateVarsInfo.mlirVars))) {
+      auto [blockArg, privDecl, mlirPrivVar] = zip;
+      // This is handled before the task executes
+      if (privDecl.readsFromMold())
+        continue;
+
+      llvm::IRBuilderBase::InsertPointGuard guard(builder);
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (!privateVarOrError)
+        return privateVarOrError.takeError();
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      privateVarsInfo.llvmVars[i] = privateVarOrError.get();
+      // Add private var to  llvmFirstPrivateVars
+      llvmFirstPrivateVars[index++] = privateVarOrError.get();
+    }
+
+    taskStructMgr.createGEPsToPrivateVars();
+    for (auto [i, llvmPrivVar] :
+         llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
+      if (!llvmPrivVar) {
...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2025

@llvm/pr-subscribers-flang-openmp

Author: Kaviya Rajendiran (kaviya2510)

Changes

Added translation support for taskloop construct.


Patch is 32.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166903.diff

6 Files Affected:

  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+16)
  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+1)
  • (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+199)
  • (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+254-3)
  • (added) mlir/test/Target/LLVMIR/openmp-taskloop.mlir (+151)
  • (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+1-14)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index b3d7ab4acf303..18828380abd32 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1359,6 +1359,22 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// Generator for `#omp taskloop`
+  ///
+  /// \param Loc The location where the taskloop construct was encountered.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param LoopInfo Callback that return the CLI
+  /// \param LBVal Lowerbound value of loop
+  /// \param UBVal Upperbound value of loop
+  /// \param StepVal Step value of loop
+  /// \param Tied True if the task is tied, false if the task is untied.
+  LLVM_ABI InsertPointOrErrorTy createTaskloop(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB,
+      llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
+      Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true);
+
   /// Generator for `#omp task`
   ///
   /// \param Loc The location where the task construct was encountered.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 46b3d53a4b408..032495dfe9d61 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -95,6 +95,7 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
 __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
 __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
 __OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
+__OMP_STRUCT_TYPE(Taskloop, kmp_task_info, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr, Int64, Int64, Int64)
 __OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
                   Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32, Int32)
 __OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fff9a815e5368..e88e722b1370e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1933,6 +1933,205 @@ static Value *emitTaskDependencies(
   return DepArray;
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    BodyGenCallbackTy BodyGenCB,
+    llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> loopInfo,
+    Value *LBVal, Value *UBVal, Value *StepVal, bool Tied) {
+
+  if (!updateToLocation(Loc))
+    return InsertPointTy();
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+
+  BasicBlock *TaskloopExitBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.exit");
+  BasicBlock *TaskloopBodyBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.body");
+  BasicBlock *TaskloopAllocaBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.alloca");
+
+  InsertPointTy TaskloopAllocaIP =
+      InsertPointTy(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+  InsertPointTy TaskloopBodyIP =
+      InsertPointTy(TaskloopBodyBB, TaskloopBodyBB->begin());
+
+  if (Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
+    return Err;
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> result = loopInfo();
+  if (!result) {
+    return result.takeError();
+  }
+
+  llvm::CanonicalLoopInfo *CLI = result.get();
+  OutlineInfo OI;
+  OI.EntryBB = TaskloopAllocaBB;
+  OI.OuterAllocaBB = AllocaIP.getBlock();
+  OI.ExitBB = TaskloopExitBB;
+
+  // Add the thread ID argument.
+  SmallVector<Instruction *, 4> ToBeDeleted;
+  // dummy instruction to be used as a fake argument
+  OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+      Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false));
+
+  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+                      TaskloopAllocaBB, CLI, Loc,
+                      ToBeDeleted](Function &OutlinedFn) mutable {
+    // Replace the Stale CI by appropriate RTL function call.
+    assert(OutlinedFn.hasOneUse() &&
+           "there must be a single user for the outlined function");
+    CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+
+    // HasShareds is true if any variables are captured in the outlined region,
+    // false otherwise.
+    bool HasShareds = StaleCI->arg_size() > 1;
+    Builder.SetInsertPoint(StaleCI);
+
+    // Gather the arguments for emitting the runtime call for
+    // @__kmpc_omp_task_alloc
+    Function *TaskAllocFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+
+    Value *ThreadID = getOrCreateThreadID(Ident);
+
+    // Emit runtime call for @__kmpc_taskgroup
+    Function *TaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+    Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+
+    // The flags are set to 1 if the task is tied, 0 otherwise.
+    Value *Flags = Builder.getInt32(Tied);
+
+    Value *TaskSize = Builder.getInt64(
+        divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
+
+    Value *SharedsSize = Builder.getInt64(0);
+    if (HasShareds) {
+      AllocaInst *ArgStructAlloca =
+          dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+      assert(ArgStructAlloca &&
+             "Unable to find the alloca instruction corresponding to arguments "
+             "for extracted function");
+      StructType *ArgStructType =
+          dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+      assert(ArgStructType && "Unable to find struct type corresponding to "
+                              "arguments for extracted function");
+      SharedsSize =
+          Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+    }
+
+    // Emit the @__kmpc_omp_task_alloc runtime call
+    // The runtime call returns a pointer to an area where the task captured
+    // variables must be copied before the task is run (TaskData)
+    CallInst *TaskData = Builder.CreateCall(
+        TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+                      /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
+                      /*task_func=*/&OutlinedFn});
+
+    // Get the pointer to loop lb, ub, step from task ptr
+    // and set up the lowerbound,upperbound and step values
+    llvm::Value *lb =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5);
+    //  Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty());
+    Builder.CreateStore(LBVal, lb);
+
+    llvm::Value *ub =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6);
+    Builder.CreateStore(UBVal, ub);
+
+    llvm::Value *step =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7);
+    Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty());
+    Builder.CreateStore(Step_ext, step);
+    llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step);
+
+    if (HasShareds) {
+      Value *Shareds = StaleCI->getArgOperand(1);
+      Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
+      Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
+      Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
+                           SharedsSize);
+    }
+
+    // set up the arguments for emitting kmpc_taskloop runtime call
+    // setting default values for ifval, nogroup, sched, grainsize, task_dup
+    Value *IfVal = Builder.getInt32(1);
+    Value *NoGroup = Builder.getInt32(1);
+    Value *Sched = Builder.getInt32(0);
+    Value *GrainSize = Builder.getInt64(0);
+    Value *TaskDup = Constant::getNullValue(Builder.getPtrTy());
+
+    Value *Args[] = {Ident,    ThreadID, TaskData, IfVal,     lb,     ub,
+                     loadstep, NoGroup,  Sched,    GrainSize, TaskDup};
+
+    // taskloop runtime call
+    Function *TaskloopFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
+    Builder.CreateCall(TaskloopFn, Args);
+
+    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
+    Function *EndTaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+    Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+
+    StaleCI->eraseFromParent();
+
+    Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+
+    if (HasShareds) {
+      LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
+      OutlinedFn.getArg(1)->replaceUsesWithIf(
+          Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
+    }
+
+    Value *IV = CLI->getIndVar();
+    Type *IVTy = IV->getType();
+    Constant *One = ConstantInt::get(IVTy, 1);
+
+    Value *task_lb = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 5, "gep_lb");
+    Value *LowerBound = Builder.CreateLoad(IVTy, task_lb, "lb");
+
+    Value *task_ub = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 6, "gep_ub");
+    Value *UpperBound = Builder.CreateLoad(IVTy, task_ub, "ub");
+
+    Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+
+    Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound);
+    Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
+    // set the trip count in the CLI
+    CLI->setTripCount(TripCount);
+
+    Builder.SetInsertPoint(CLI->getBody(),
+                           CLI->getBody()->getFirstInsertionPt());
+
+    llvm::BasicBlock *Body = CLI->getBody();
+    for (llvm::Instruction &I : *Body) {
+      if (auto *Add = llvm::dyn_cast<llvm::BinaryOperator>(&I)) {
+        if (Add->getOpcode() == llvm::Instruction::Add) {
+          if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) {
+            // update the starting index of the loop
+            Add->setOperand(1, LowerBound);
+          }
+        }
+      }
+    }
+
+    for (Instruction *I : llvm::reverse(ToBeDeleted)) {
+      I->eraseFromParent();
+    }
+  };
+
+  addOutlineInfo(std::move(OI));
+  Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->begin());
+  return Builder.saveIP();
+}
+
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8edec990eaaba..d69fcd3db0413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -323,6 +323,18 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkFinal = [&todo](auto op, LogicalResult &result) {
+    if (op.getFinal())
+      result = todo("final");
+  };
+  auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
+    if (op.getGrainsize())
+      result = todo("grainsize");
+  };
+  auto checkIf = [](auto op, LogicalResult &) {
+    if (op.getIfExpr())
+      op.emitWarning("if");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -340,10 +352,22 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
       result = todo("linear");
   };
+  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
+    if (op.getMergeable())
+      result = todo("mergeable");
+  };
+  auto checkNogroup = [&todo](auto op, LogicalResult &result) {
+    if (op.getNogroup())
+      result = todo("nogroup");
+  };
   auto checkNowait = [&todo](auto op, LogicalResult &result) {
     if (op.getNowait())
       result = todo("nowait");
   };
+  auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
+    if (op.getNumTasks())
+      result = todo("num_tasks");
+  };
   auto checkOrder = [&todo](auto op, LogicalResult &result) {
     if (op.getOrder() || op.getOrderMod())
       result = todo("order");
@@ -417,7 +441,15 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkNowait(op, result);
       })
       .Case([&](omp::TaskloopOp op) {
-        // TODO: Add other clauses check
+        checkAllocate(op, result);
+        checkFinal(op, result);
+        checkGrainsize(op, result);
+        checkIf(op, result);
+        checkInReduction(op, result);
+        checkMergeable(op, result);
+        checkNogroup(op, result);
+        checkNumTasks(op, result);
+        checkReduction(op, result);
         checkUntied(op, result);
         checkPriority(op, result);
       })
@@ -2097,6 +2129,8 @@ class TaskContextStructManager {
   /// private decls.
   void createGEPsToPrivateVars();
 
+  llvm::Value *isAllocated();
+
   /// De-allocate the task context structure.
   void freeStructPtr();
 
@@ -2177,13 +2211,26 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
   }
 }
 
+llvm::Value *TaskContextStructManager::isAllocated() {
+  if (!structPtr)
+    return nullptr;
+
+  return builder.CreateIsNotNull(structPtr);
+}
+
 void TaskContextStructManager::freeStructPtr() {
   if (!structPtr)
     return;
 
   llvm::IRBuilderBase::InsertPointGuard guard{builder};
-  // Ensure we don't put the call to free() after the terminator
-  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  llvm::BasicBlock *currentBlock = builder.GetInsertBlock();
+  if (currentBlock->getTerminator()) {
+    // Ensure we don't put the call to free() after the terminator
+    builder.SetInsertPoint(currentBlock->getTerminator());
+  } else {
+    // Insert the call to free() at the end of the current block
+    builder.SetInsertPoint(currentBlock);
+  }
   builder.CreateFree(structPtr);
 }
 
@@ -2419,6 +2466,207 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
+// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto taskloopOp = cast<omp::TaskloopOp>(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  // It stores the pointer of allocated firstprivate copies,
+  // which can be used later for freeing the allocated space.
+  SmallVector<llvm::Value *> llvmFirstPrivateVars;
+  PrivateVarsInfo privateVarsInfo(taskloopOp);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateVarsInfo.privatizers};
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+
+  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+  llvm::BasicBlock *taskloopStartBlock = llvm::BasicBlock::Create(
+      builder.getContext(), "omp.taskloop.start",
+      /*Parent=*/builder.GetInsertBlock()->getParent());
+  llvm::Instruction *branchToTaskloopStartBlock =
+      builder.CreateBr(taskloopStartBlock);
+  builder.SetInsertPoint(branchToTaskloopStartBlock);
+
+  llvm::BasicBlock *copyBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+  llvm::BasicBlock *initBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+  LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+      moduleTranslation, allocaIP);
+
+  // Allocate and initialize private variables
+  builder.SetInsertPoint(initBlock->getTerminator());
+
+  taskStructMgr.generateTaskContextStruct();
+  taskStructMgr.createGEPsToPrivateVars();
+
+  llvmFirstPrivateVars.resize(privateVarsInfo.blockArgs.size());
+  int index = 0;
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
+                       privateVarsInfo.blockArgs,
+                       taskStructMgr.getLLVMPrivateVarGEPs())) {
+    // To be handled inside the taskloop.
+    if (!privDecl.readsFromMold())
+      continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
+
+    llvm::Expected<llvm::Value *> privateVarOrErr =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (!privateVarOrErr)
+      return handleError(privateVarOrErr, *taskloopOp.getOperation());
+
+    llvmFirstPrivateVars[index++] = privateVarOrErr.get();
+
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+  }
+
+  // firstprivate copy region
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
+  if (failed(copyFirstPrivateVars(
+          taskloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
+          taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
+          taskloopOp.getPrivateNeedsBarrier())))
+    return llvm::failure();
+
+  // Set up inserttion point for call to createTaskloop()
+  builder.SetInsertPoint(taskloopStartBlock);
+
+  auto bodyCB = [&](InsertPointTy allocaIP,
+                    InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // translate the body of the taskloop:
+    builder.restoreIP(codegenIP);
+
+    llvm::BasicBlock *privInitBlock = nullptr;
+    privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
+    for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
+             privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
+             privateVarsInfo.mlirVars))) {
+      auto [blockArg, privDecl, mlirPrivVar] = zip;
+      // This is handled before the task executes
+      if (privDecl.readsFromMold())
+        continue;
+
+      llvm::IRBuilderBase::InsertPointGuard guard(builder);
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (!privateVarOrError)
+        return privateVarOrError.takeError();
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      privateVarsInfo.llvmVars[i] = privateVarOrError.get();
+      // Add private var to  llvmFirstPrivateVars
+      llvmFirstPrivateVars[index++] = privateVarOrError.get();
+    }
+
+    taskStructMgr.createGEPsToPrivateVars();
+    for (auto [i, llvmPrivVar] :
+         llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
+      if (!llvmPrivVar) {
...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2025

@llvm/pr-subscribers-mlir

Author: Kaviya Rajendiran (kaviya2510)

Changes

Added translation support for taskloop construct.


Patch is 32.13 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166903.diff

6 Files Affected:

  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+16)
  • (modified) llvm/include/llvm/Frontend/OpenMP/OMPKinds.def (+1)
  • (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+199)
  • (modified) mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp (+254-3)
  • (added) mlir/test/Target/LLVMIR/openmp-taskloop.mlir (+151)
  • (modified) mlir/test/Target/LLVMIR/openmp-todo.mlir (+1-14)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index b3d7ab4acf303..18828380abd32 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -1359,6 +1359,22 @@ class OpenMPIRBuilder {
         : DepKind(DepKind), DepValueType(DepValueType), DepVal(DepVal) {}
   };
 
+  /// Generator for `#omp taskloop`
+  ///
+  /// \param Loc The location where the taskloop construct was encountered.
+  /// \param AllocaIP The insertion point to be used for alloca instructions.
+  /// \param BodyGenCB Callback that will generate the region code.
+  /// \param LoopInfo Callback that return the CLI
+  /// \param LBVal Lowerbound value of loop
+  /// \param UBVal Upperbound value of loop
+  /// \param StepVal Step value of loop
+  /// \param Tied True if the task is tied, false if the task is untied.
+  LLVM_ABI InsertPointOrErrorTy createTaskloop(
+      const LocationDescription &Loc, InsertPointTy AllocaIP,
+      BodyGenCallbackTy BodyGenCB,
+      llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
+      Value *LBVal, Value *UBVal, Value *StepVal, bool Tied = true);
+
   /// Generator for `#omp task`
   ///
   /// \param Loc The location where the task construct was encountered.
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 46b3d53a4b408..032495dfe9d61 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -95,6 +95,7 @@ __OMP_STRUCT_TYPE(KernelArgs, __tgt_kernel_arguments, false, Int32, Int32, VoidP
 __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr)
 __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8)
 __OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr)
+__OMP_STRUCT_TYPE(Taskloop, kmp_task_info, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr, Int64, Int64, Int64)
 __OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false,
                   Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32, Int32)
 __OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index fff9a815e5368..e88e722b1370e 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -1933,6 +1933,205 @@ static Value *emitTaskDependencies(
   return DepArray;
 }
 
+OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTaskloop(
+    const LocationDescription &Loc, InsertPointTy AllocaIP,
+    BodyGenCallbackTy BodyGenCB,
+    llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> loopInfo,
+    Value *LBVal, Value *UBVal, Value *StepVal, bool Tied) {
+
+  if (!updateToLocation(Loc))
+    return InsertPointTy();
+
+  uint32_t SrcLocStrSize;
+  Constant *SrcLocStr = getOrCreateSrcLocStr(Loc, SrcLocStrSize);
+  Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
+
+  BasicBlock *TaskloopExitBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.exit");
+  BasicBlock *TaskloopBodyBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.body");
+  BasicBlock *TaskloopAllocaBB =
+      splitBB(Builder, /*CreateBranch=*/true, "taskloop.alloca");
+
+  InsertPointTy TaskloopAllocaIP =
+      InsertPointTy(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+  InsertPointTy TaskloopBodyIP =
+      InsertPointTy(TaskloopBodyBB, TaskloopBodyBB->begin());
+
+  if (Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
+    return Err;
+
+  llvm::Expected<llvm::CanonicalLoopInfo *> result = loopInfo();
+  if (!result) {
+    return result.takeError();
+  }
+
+  llvm::CanonicalLoopInfo *CLI = result.get();
+  OutlineInfo OI;
+  OI.EntryBB = TaskloopAllocaBB;
+  OI.OuterAllocaBB = AllocaIP.getBlock();
+  OI.ExitBB = TaskloopExitBB;
+
+  // Add the thread ID argument.
+  SmallVector<Instruction *, 4> ToBeDeleted;
+  // dummy instruction to be used as a fake argument
+  OI.ExcludeArgsFromAggregate.push_back(createFakeIntVal(
+      Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP, "global.tid", false));
+
+  OI.PostOutlineCB = [this, Ident, LBVal, UBVal, StepVal, Tied,
+                      TaskloopAllocaBB, CLI, Loc,
+                      ToBeDeleted](Function &OutlinedFn) mutable {
+    // Replace the Stale CI by appropriate RTL function call.
+    assert(OutlinedFn.hasOneUse() &&
+           "there must be a single user for the outlined function");
+    CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
+
+    // HasShareds is true if any variables are captured in the outlined region,
+    // false otherwise.
+    bool HasShareds = StaleCI->arg_size() > 1;
+    Builder.SetInsertPoint(StaleCI);
+
+    // Gather the arguments for emitting the runtime call for
+    // @__kmpc_omp_task_alloc
+    Function *TaskAllocFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
+
+    Value *ThreadID = getOrCreateThreadID(Ident);
+
+    // Emit runtime call for @__kmpc_taskgroup
+    Function *TaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
+    Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
+
+    // The flags are set to 1 if the task is tied, 0 otherwise.
+    Value *Flags = Builder.getInt32(Tied);
+
+    Value *TaskSize = Builder.getInt64(
+        divideCeil(M.getDataLayout().getTypeSizeInBits(Taskloop), 8));
+
+    Value *SharedsSize = Builder.getInt64(0);
+    if (HasShareds) {
+      AllocaInst *ArgStructAlloca =
+          dyn_cast<AllocaInst>(StaleCI->getArgOperand(1));
+      assert(ArgStructAlloca &&
+             "Unable to find the alloca instruction corresponding to arguments "
+             "for extracted function");
+      StructType *ArgStructType =
+          dyn_cast<StructType>(ArgStructAlloca->getAllocatedType());
+      assert(ArgStructType && "Unable to find struct type corresponding to "
+                              "arguments for extracted function");
+      SharedsSize =
+          Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
+    }
+
+    // Emit the @__kmpc_omp_task_alloc runtime call
+    // The runtime call returns a pointer to an area where the task captured
+    // variables must be copied before the task is run (TaskData)
+    CallInst *TaskData = Builder.CreateCall(
+        TaskAllocFn, {/*loc_ref=*/Ident, /*gtid=*/ThreadID, /*flags=*/Flags,
+                      /*sizeof_task=*/TaskSize, /*sizeof_shared=*/SharedsSize,
+                      /*task_func=*/&OutlinedFn});
+
+    // Get the pointer to loop lb, ub, step from task ptr
+    // and set up the lowerbound,upperbound and step values
+    llvm::Value *lb =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 5);
+    //  Value *LbVal_ext = Builder.CreateSExt(LBVal, Builder.getInt64Ty());
+    Builder.CreateStore(LBVal, lb);
+
+    llvm::Value *ub =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 6);
+    Builder.CreateStore(UBVal, ub);
+
+    llvm::Value *step =
+        Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop, TaskData, 7);
+    Value *Step_ext = Builder.CreateSExt(StepVal, Builder.getInt64Ty());
+    Builder.CreateStore(Step_ext, step);
+    llvm::Value *loadstep = Builder.CreateLoad(Builder.getInt64Ty(), step);
+
+    if (HasShareds) {
+      Value *Shareds = StaleCI->getArgOperand(1);
+      Align Alignment = TaskData->getPointerAlignment(M.getDataLayout());
+      Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
+      Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
+                           SharedsSize);
+    }
+
+    // set up the arguments for emitting kmpc_taskloop runtime call
+    // setting default values for ifval, nogroup, sched, grainsize, task_dup
+    Value *IfVal = Builder.getInt32(1);
+    Value *NoGroup = Builder.getInt32(1);
+    Value *Sched = Builder.getInt32(0);
+    Value *GrainSize = Builder.getInt64(0);
+    Value *TaskDup = Constant::getNullValue(Builder.getPtrTy());
+
+    Value *Args[] = {Ident,    ThreadID, TaskData, IfVal,     lb,     ub,
+                     loadstep, NoGroup,  Sched,    GrainSize, TaskDup};
+
+    // taskloop runtime call
+    Function *TaskloopFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskloop);
+    Builder.CreateCall(TaskloopFn, Args);
+
+    // Emit the @__kmpc_end_taskgroup runtime call to end the taskgroup
+    Function *EndTaskgroupFn =
+        getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
+    Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
+
+    StaleCI->eraseFromParent();
+
+    Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
+
+    if (HasShareds) {
+      LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
+      OutlinedFn.getArg(1)->replaceUsesWithIf(
+          Shareds, [Shareds](Use &U) { return U.getUser() != Shareds; });
+    }
+
+    Value *IV = CLI->getIndVar();
+    Type *IVTy = IV->getType();
+    Constant *One = ConstantInt::get(IVTy, 1);
+
+    Value *task_lb = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 5, "gep_lb");
+    Value *LowerBound = Builder.CreateLoad(IVTy, task_lb, "lb");
+
+    Value *task_ub = Builder.CreateStructGEP(OpenMPIRBuilder::Taskloop,
+                                             OutlinedFn.getArg(1), 6, "gep_ub");
+    Value *UpperBound = Builder.CreateLoad(IVTy, task_ub, "ub");
+
+    Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
+
+    Value *TripCountMinusOne = Builder.CreateSub(UpperBound, LowerBound);
+    Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One, "trip_cnt");
+    // set the trip count in the CLI
+    CLI->setTripCount(TripCount);
+
+    Builder.SetInsertPoint(CLI->getBody(),
+                           CLI->getBody()->getFirstInsertionPt());
+
+    llvm::BasicBlock *Body = CLI->getBody();
+    for (llvm::Instruction &I : *Body) {
+      if (auto *Add = llvm::dyn_cast<llvm::BinaryOperator>(&I)) {
+        if (Add->getOpcode() == llvm::Instruction::Add) {
+          if (llvm::isa<llvm::BinaryOperator>(Add->getOperand(0))) {
+            // update the starting index of the loop
+            Add->setOperand(1, LowerBound);
+          }
+        }
+      }
+    }
+
+    for (Instruction *I : llvm::reverse(ToBeDeleted)) {
+      I->eraseFromParent();
+    }
+  };
+
+  addOutlineInfo(std::move(OI));
+  Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->begin());
+  return Builder.saveIP();
+}
+
 OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
     const LocationDescription &Loc, InsertPointTy AllocaIP,
     BodyGenCallbackTy BodyGenCB, bool Tied, Value *Final, Value *IfCondition,
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 8edec990eaaba..d69fcd3db0413 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -323,6 +323,18 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (op.getDistScheduleChunkSize())
       result = todo("dist_schedule with chunk_size");
   };
+  auto checkFinal = [&todo](auto op, LogicalResult &result) {
+    if (op.getFinal())
+      result = todo("final");
+  };
+  auto checkGrainsize = [&todo](auto op, LogicalResult &result) {
+    if (op.getGrainsize())
+      result = todo("grainsize");
+  };
+  auto checkIf = [](auto op, LogicalResult &) {
+    if (op.getIfExpr())
+      op.emitWarning("if");
+  };
   auto checkHint = [](auto op, LogicalResult &) {
     if (op.getHint())
       op.emitWarning("hint clause discarded");
@@ -340,10 +352,22 @@ static LogicalResult checkImplementationStatus(Operation &op) {
     if (!op.getLinearVars().empty() || !op.getLinearStepVars().empty())
       result = todo("linear");
   };
+  auto checkMergeable = [&todo](auto op, LogicalResult &result) {
+    if (op.getMergeable())
+      result = todo("mergeable");
+  };
+  auto checkNogroup = [&todo](auto op, LogicalResult &result) {
+    if (op.getNogroup())
+      result = todo("nogroup");
+  };
   auto checkNowait = [&todo](auto op, LogicalResult &result) {
     if (op.getNowait())
       result = todo("nowait");
   };
+  auto checkNumTasks = [&todo](auto op, LogicalResult &result) {
+    if (op.getNumTasks())
+      result = todo("num_tasks");
+  };
   auto checkOrder = [&todo](auto op, LogicalResult &result) {
     if (op.getOrder() || op.getOrderMod())
       result = todo("order");
@@ -417,7 +441,15 @@ static LogicalResult checkImplementationStatus(Operation &op) {
         checkNowait(op, result);
       })
       .Case([&](omp::TaskloopOp op) {
-        // TODO: Add other clauses check
+        checkAllocate(op, result);
+        checkFinal(op, result);
+        checkGrainsize(op, result);
+        checkIf(op, result);
+        checkInReduction(op, result);
+        checkMergeable(op, result);
+        checkNogroup(op, result);
+        checkNumTasks(op, result);
+        checkReduction(op, result);
         checkUntied(op, result);
         checkPriority(op, result);
       })
@@ -2097,6 +2129,8 @@ class TaskContextStructManager {
   /// private decls.
   void createGEPsToPrivateVars();
 
+  llvm::Value *isAllocated();
+
   /// De-allocate the task context structure.
   void freeStructPtr();
 
@@ -2177,13 +2211,26 @@ void TaskContextStructManager::createGEPsToPrivateVars() {
   }
 }
 
+llvm::Value *TaskContextStructManager::isAllocated() {
+  if (!structPtr)
+    return nullptr;
+
+  return builder.CreateIsNotNull(structPtr);
+}
+
 void TaskContextStructManager::freeStructPtr() {
   if (!structPtr)
     return;
 
   llvm::IRBuilderBase::InsertPointGuard guard{builder};
-  // Ensure we don't put the call to free() after the terminator
-  builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+  llvm::BasicBlock *currentBlock = builder.GetInsertBlock();
+  if (currentBlock->getTerminator()) {
+    // Ensure we don't put the call to free() after the terminator
+    builder.SetInsertPoint(currentBlock->getTerminator());
+  } else {
+    // Insert the call to free() at the end of the current block
+    builder.SetInsertPoint(currentBlock);
+  }
   builder.CreateFree(structPtr);
 }
 
@@ -2419,6 +2466,207 @@ convertOmpTaskOp(omp::TaskOp taskOp, llvm::IRBuilderBase &builder,
   return success();
 }
 
+// Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder.
+static LogicalResult
+convertOmpTaskloopOp(Operation &opInst, llvm::IRBuilderBase &builder,
+                     LLVM::ModuleTranslation &moduleTranslation) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+  auto taskloopOp = cast<omp::TaskloopOp>(opInst);
+  if (failed(checkImplementationStatus(opInst)))
+    return failure();
+
+  // It stores the pointer of allocated firstprivate copies,
+  // which can be used later for freeing the allocated space.
+  SmallVector<llvm::Value *> llvmFirstPrivateVars;
+  PrivateVarsInfo privateVarsInfo(taskloopOp);
+  TaskContextStructManager taskStructMgr{builder, moduleTranslation,
+                                         privateVarsInfo.privatizers};
+
+  llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
+      findAllocaInsertPoint(builder, moduleTranslation);
+
+  assert(builder.GetInsertPoint() == builder.GetInsertBlock()->end());
+  llvm::BasicBlock *taskloopStartBlock = llvm::BasicBlock::Create(
+      builder.getContext(), "omp.taskloop.start",
+      /*Parent=*/builder.GetInsertBlock()->getParent());
+  llvm::Instruction *branchToTaskloopStartBlock =
+      builder.CreateBr(taskloopStartBlock);
+  builder.SetInsertPoint(branchToTaskloopStartBlock);
+
+  llvm::BasicBlock *copyBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.copy");
+  llvm::BasicBlock *initBlock =
+      splitBB(builder, /*CreateBranch=*/true, "omp.private.init");
+
+  LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+      moduleTranslation, allocaIP);
+
+  // Allocate and initialize private variables
+  builder.SetInsertPoint(initBlock->getTerminator());
+
+  taskStructMgr.generateTaskContextStruct();
+  taskStructMgr.createGEPsToPrivateVars();
+
+  llvmFirstPrivateVars.resize(privateVarsInfo.blockArgs.size());
+  int index = 0;
+
+  for (auto [privDecl, mlirPrivVar, blockArg, llvmPrivateVarAlloc] :
+       llvm::zip_equal(privateVarsInfo.privatizers, privateVarsInfo.mlirVars,
+                       privateVarsInfo.blockArgs,
+                       taskStructMgr.getLLVMPrivateVarGEPs())) {
+    // To be handled inside the taskloop.
+    if (!privDecl.readsFromMold())
+      continue;
+    assert(llvmPrivateVarAlloc &&
+           "reads from mold so shouldn't have been skipped");
+
+    llvm::Expected<llvm::Value *> privateVarOrErr =
+        initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                       blockArg, llvmPrivateVarAlloc, initBlock);
+    if (!privateVarOrErr)
+      return handleError(privateVarOrErr, *taskloopOp.getOperation());
+
+    llvmFirstPrivateVars[index++] = privateVarOrErr.get();
+
+    llvm::IRBuilderBase::InsertPointGuard guard(builder);
+    builder.SetInsertPoint(builder.GetInsertBlock()->getTerminator());
+
+    if ((privateVarOrErr.get() != llvmPrivateVarAlloc) &&
+        !mlir::isa<LLVM::LLVMPointerType>(blockArg.getType())) {
+      builder.CreateStore(privateVarOrErr.get(), llvmPrivateVarAlloc);
+      // Load it so we have the value pointed to by the GEP
+      llvmPrivateVarAlloc = builder.CreateLoad(privateVarOrErr.get()->getType(),
+                                               llvmPrivateVarAlloc);
+    }
+    assert(llvmPrivateVarAlloc->getType() ==
+           moduleTranslation.convertType(blockArg.getType()));
+  }
+
+  // firstprivate copy region
+  setInsertPointForPossiblyEmptyBlock(builder, copyBlock);
+  if (failed(copyFirstPrivateVars(
+          taskloopOp, builder, moduleTranslation, privateVarsInfo.mlirVars,
+          taskStructMgr.getLLVMPrivateVarGEPs(), privateVarsInfo.privatizers,
+          taskloopOp.getPrivateNeedsBarrier())))
+    return llvm::failure();
+
+  // Set up inserttion point for call to createTaskloop()
+  builder.SetInsertPoint(taskloopStartBlock);
+
+  auto bodyCB = [&](InsertPointTy allocaIP,
+                    InsertPointTy codegenIP) -> llvm::Error {
+    // Save the alloca insertion point on ModuleTranslation stack for use in
+    // nested regions.
+    LLVM::ModuleTranslation::SaveStack<OpenMPAllocaStackFrame> frame(
+        moduleTranslation, allocaIP);
+
+    // translate the body of the taskloop:
+    builder.restoreIP(codegenIP);
+
+    llvm::BasicBlock *privInitBlock = nullptr;
+    privateVarsInfo.llvmVars.resize(privateVarsInfo.blockArgs.size());
+    for (auto [i, zip] : llvm::enumerate(llvm::zip_equal(
+             privateVarsInfo.blockArgs, privateVarsInfo.privatizers,
+             privateVarsInfo.mlirVars))) {
+      auto [blockArg, privDecl, mlirPrivVar] = zip;
+      // This is handled before the task executes
+      if (privDecl.readsFromMold())
+        continue;
+
+      llvm::IRBuilderBase::InsertPointGuard guard(builder);
+      llvm::Type *llvmAllocType =
+          moduleTranslation.convertType(privDecl.getType());
+      builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
+      llvm::Value *llvmPrivateVar = builder.CreateAlloca(
+          llvmAllocType, /*ArraySize=*/nullptr, "omp.private.alloc");
+
+      llvm::Expected<llvm::Value *> privateVarOrError =
+          initPrivateVar(builder, moduleTranslation, privDecl, mlirPrivVar,
+                         blockArg, llvmPrivateVar, privInitBlock);
+      if (!privateVarOrError)
+        return privateVarOrError.takeError();
+      moduleTranslation.mapValue(blockArg, privateVarOrError.get());
+      privateVarsInfo.llvmVars[i] = privateVarOrError.get();
+      // Add private var to  llvmFirstPrivateVars
+      llvmFirstPrivateVars[index++] = privateVarOrError.get();
+    }
+
+    taskStructMgr.createGEPsToPrivateVars();
+    for (auto [i, llvmPrivVar] :
+         llvm::enumerate(taskStructMgr.getLLVMPrivateVarGEPs())) {
+      if (!llvmPrivVar) {
...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants