-
Notifications
You must be signed in to change notification settings - Fork 14.4k
[Clang] Fix clang crash for fopenmp statement(parallel for) inside lambda function #146772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
…mbda function Previously, implicit variable like `__begin3` used in range-based OpenMP for-loops were not being properly privatized, leading to missing entries in LocalDeclMap and crashes. This patch ensures such implicit loop counters are handled in `EmitOMPPrivateLoopCounters` by allocating memory and registering them in the local declaration map before loop codegen. This fixes #PR146335
@llvm/pr-subscribers-clang-codegen @llvm/pr-subscribers-clang Author: Shivam Gupta (xgupta) ChangesPreviously, implicit variable like This patch ensures such implicit loop counters are handled in This fixes #PR146335 Patch is 27.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146772.diff 2 Files Affected:
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 5822e0f6db89a..d32f976afa23d 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -2332,6 +2332,12 @@ void CodeGenFunction::EmitOMPPrivateLoopCounters(
for (const Expr *E : S.counters()) {
const auto *VD = cast<VarDecl>(cast<DeclRefExpr>(E)->getDecl());
const auto *PrivateVD = cast<VarDecl>(cast<DeclRefExpr>(*I)->getDecl());
+ // Privatize original counter variable (e.g., __beginN, __endN, __rangeN)
+ if (!LocalDeclMap.count(VD)) {
+ Address Addr = CreateMemTemp(VD->getType(), VD->getName());
+ LocalDeclMap.insert({VD, Addr});
+ (void)LoopScope.addPrivate(VD, Addr);
+ }
// Emit var without initialization.
AutoVarEmission VarEmission = EmitAutoVarAlloca(*PrivateVD);
EmitAutoVarCleanups(VarEmission);
diff --git a/clang/test/OpenMP/for_range_loop_codegen.cpp b/clang/test/OpenMP/for_range_loop_codegen.cpp
new file mode 100644
index 0000000000000..bc86c7a477210
--- /dev/null
+++ b/clang/test/OpenMP/for_range_loop_codegen.cpp
@@ -0,0 +1,419 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --clang-args ['-fopenmp', '-std=c++20'] --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// RUN: %clang_cc1 -verify -fopenmp -std=c++20 -emit-llvm %s -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+enum a {};
+class b {
+public:
+ b(int);
+};
+struct c {
+ typedef b h;
+};
+using e = b;
+using f = b;
+template <typename> class g {};
+template <typename l> class i {
+public:
+ using j = long;
+ l::h aa;
+ i(g<l>) : aa{0} {}
+ bool operator!=(i);
+ void operator++();
+ auto operator*() {
+ int k;
+ return k;
+ }
+ j operator-(i);
+ void operator+=(j);
+};
+class o : public g<c> {
+public:
+ using p = int;
+ o(p, e, f);
+};
+// CHECK1-LABEL: @_Z5begin1gI1cE(
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[RETVAL:%.*]] = alloca [[CLASS_I:%.*]], align 1
+// CHECK1-NEXT: [[AC:%.*]] = alloca [[CLASS_G:%.*]], align 1
+// CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_G]], align 1
+// CHECK1-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[AC]], i32 0, i32 0
+// CHECK1-NEXT: store i8 [[AC_COERCE:%.*]], ptr [[COERCE_DIVE]], align 1
+// CHECK1-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[AGG_TMP]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE1]], align 1
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1iI1cEC1E1gIS0_E(ptr noundef nonnull align 1 dereferenceable(1) [[RETVAL]], i8 [[TMP0]])
+// CHECK1-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE2]], align 1
+// CHECK1-NEXT: ret i8 [[TMP1]]
+auto begin(g<c> ac) {
+ i ad(ac);
+ return ad;
+}
+template <typename l> auto end(l ac) {
+ i ad(ac);
+ return ad;
+}
+using m = b;
+using n = b;
+class D {
+protected:
+ D(int, int, n, m, a);
+};
+class H : D {
+ int af;
+ o t;
+
+public:
+ a v;
+ int q;
+// CHECK1-LABEL: @_ZN1HC1E1bS0_(
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[AG:%.*]] = alloca [[CLASS_B:%.*]], align 1
+// CHECK1-NEXT: [[R:%.*]] = alloca [[CLASS_B]], align 1
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AG]], i32 0, i32 0
+// CHECK1-NEXT: store i8 [[AG_COERCE:%.*]], ptr [[COERCE_DIVE]], align 1
+// CHECK1-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[R]], i32 0, i32 0
+// CHECK1-NEXT: store i8 [[R_COERCE:%.*]], ptr [[COERCE_DIVE1]], align 1
+// CHECK1-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[THIS2:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[COERCE_DIVE3:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AG]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE3]], align 1
+// CHECK1-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[R]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE4]], align 1
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1HC2E1bS0_(ptr noundef nonnull align 4 dereferenceable(16) [[THIS2]], i8 [[TMP0]], i8 [[TMP1]])
+// CHECK1-NEXT: ret ptr [[THIS2]]
+ H(n ag, m r) : D(0, 0, ag, r, a{}), af(0), t{o(0, 0, 0)}, q(0), v() {}
+// CHECK1-LABEL: @_ZN1H1sEv(
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4
+// CHECK1-NEXT: ret void
+ auto s() { return t; }
+};
+template <typename> class F { void ak(H &) const; };
+template <typename am> void F<am>::ak(H &an) const {
+ auto ao = an.s();
+ auto ap = [ao](H) {
+#pragma omp for
+ for (auto d : ao)
+ ;
+ };
+ H ar(0, 0);
+ ap(ar);
+}
+// CHECK1-LABEL: @_ZNK1FIdE2akER1H(
+// CHECK1-NEXT: entry:
+// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[AN_ADDR:%.*]] = alloca ptr, align 8
+// CHECK1-NEXT: [[AO:%.*]] = alloca [[CLASS_O:%.*]], align 1
+// CHECK1-NEXT: [[UNDEF_AGG_TMP:%.*]] = alloca [[CLASS_O]], align 1
+// CHECK1-NEXT: [[AP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
+// CHECK1-NEXT: [[AR:%.*]] = alloca [[CLASS_H:%.*]], align 4
+// CHECK1-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B:%.*]], align 1
+// CHECK1-NEXT: [[AGG_TMP2:%.*]] = alloca [[CLASS_B]], align 1
+// CHECK1-NEXT: [[AGG_TMP6:%.*]] = alloca [[CLASS_H]], align 4
+// CHECK1-NEXT: store ptr [[THIS:%.*]], ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: store ptr [[AN:%.*]], ptr [[AN_ADDR]], align 8
+// CHECK1-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AN_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
+// CHECK1-NEXT: call void @_ZN1H1sEv(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0]])
+// CHECK1-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1bC1Ei(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP]], i32 noundef 0)
+// CHECK1-NEXT: [[CALL3:%.*]] = call noundef ptr @_ZN1bC1Ei(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP2]], i32 noundef 0)
+// CHECK1-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AGG_TMP]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK1-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AGG_TMP2]], i32 0, i32 0
+// CHECK1-NEXT: [[TMP2:%.*]] = load i8, ptr [[COERCE_DIVE4]], align 1
+// CHECK1-NEXT: [[CALL5:%.*]] = call noundef ptr @_ZN1HC1E1bS0_(ptr noundef nonnull align 4 dereferenceable(16) [[AR]], i8 [[TMP1]], i8 [[TMP2]])
+// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP6]], ptr align 4 [[AR]], i64 16, i1 false)
+// CHECK1-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[AGG_TMP6]], align 4
+// CHECK1-NEXT: call void @_ZZNK1FIdE2akER1HENKUlS1_E_clES1_(ptr noundef nonnull align 1 dereferenceable(1) [[AP]], [2 x i64] [[TMP3]])
+// CHECK1-NEXT: ret void
+template class F<double>;
+#endif
+// CHECK-LABEL: define {{[^@]+}}@_Z5begin1gI1cE
+// CHECK-SAME: (i8 [[AC_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[CLASS_I:%.*]], align 1
+// CHECK-NEXT: [[AC:%.*]] = alloca [[CLASS_G:%.*]], align 1
+// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_G]], align 1
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[AC]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[AC_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[AGG_TMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE1]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1iI1cEC1E1gIS0_E(ptr noundef nonnull align 1 dereferenceable(1) [[RETVAL]], i8 [[TMP0]])
+// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[RETVAL]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE2]], align 1
+// CHECK-NEXT: ret i8 [[TMP1]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZN1iI1cEC1E1gIS0_E
+// CHECK-SAME: (ptr noundef nonnull returned align 1 dereferenceable(1) [[THIS:%.*]], i8 [[DOTCOERCE:%.*]]) unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CLASS_G:%.*]], align 1
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[DOTCOERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE2]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1iI1cEC2E1gIS0_E(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]], i8 [[TMP1]])
+// CHECK-NEXT: ret ptr [[THIS1]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZN1iI1cEC2E1gIS0_E
+// CHECK-SAME: (ptr noundef nonnull returned align 1 dereferenceable(1) [[THIS:%.*]], i8 [[DOTCOERCE:%.*]]) unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CLASS_G:%.*]], align 1
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[TMP0]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[DOTCOERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1bC1Ei(ptr noundef nonnull align 1 dereferenceable(1) [[THIS1]], i32 noundef 0)
+// CHECK-NEXT: ret ptr [[THIS1]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZNK1FIdE2akER1H
+// CHECK-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]], ptr noundef nonnull align 4 dereferenceable(16) [[AN:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[AN_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[AO:%.*]] = alloca [[CLASS_O:%.*]], align 1
+// CHECK-NEXT: [[UNDEF_AGG_TMP:%.*]] = alloca [[CLASS_O]], align 1
+// CHECK-NEXT: [[AP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
+// CHECK-NEXT: [[AR:%.*]] = alloca [[CLASS_H:%.*]], align 4
+// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_B:%.*]], align 1
+// CHECK-NEXT: [[AGG_TMP2:%.*]] = alloca [[CLASS_B]], align 1
+// CHECK-NEXT: [[AGG_TMP6:%.*]] = alloca [[CLASS_H]], align 4
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: store ptr [[AN]], ptr [[AN_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[AN_ADDR]], align 8, !nonnull [[META3:![0-9]+]], !align [[META4:![0-9]+]]
+// CHECK-NEXT: call void @_ZN1H1sEv(ptr noundef nonnull align 4 dereferenceable(16) [[TMP0]])
+// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1bC1Ei(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP]], i32 noundef 0)
+// CHECK-NEXT: [[CALL3:%.*]] = call noundef ptr @_ZN1bC1Ei(ptr noundef nonnull align 1 dereferenceable(1) [[AGG_TMP2]], i32 noundef 0)
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AGG_TMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AGG_TMP2]], i32 0, i32 0
+// CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr [[COERCE_DIVE4]], align 1
+// CHECK-NEXT: [[CALL5:%.*]] = call noundef ptr @_ZN1HC1E1bS0_(ptr noundef nonnull align 4 dereferenceable(16) [[AR]], i8 [[TMP1]], i8 [[TMP2]])
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP6]], ptr align 4 [[AR]], i64 16, i1 false)
+// CHECK-NEXT: [[TMP3:%.*]] = load [2 x i64], ptr [[AGG_TMP6]], align 4
+// CHECK-NEXT: call void @_ZZNK1FIdE2akER1HENKUlS1_E_clES1_(ptr noundef nonnull align 1 dereferenceable(1) [[AP]], [2 x i64] [[TMP3]])
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZN1H1sEv
+// CHECK-SAME: (ptr noundef nonnull align 4 dereferenceable(16) [[THIS:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[THIS1]], i64 4
+// CHECK-NEXT: ret void
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZN1HC1E1bS0_
+// CHECK-SAME: (ptr noundef nonnull returned align 4 dereferenceable(16) [[THIS:%.*]], i8 [[AG_COERCE:%.*]], i8 [[R_COERCE:%.*]]) unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[AG:%.*]] = alloca [[CLASS_B:%.*]], align 1
+// CHECK-NEXT: [[R:%.*]] = alloca [[CLASS_B]], align 1
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AG]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[AG_COERCE]], ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: [[COERCE_DIVE1:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[R]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[R_COERCE]], ptr [[COERCE_DIVE1]], align 1
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS2:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[COERCE_DIVE3:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[AG]], i32 0, i32 0
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[COERCE_DIVE3]], align 1
+// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[CLASS_B]], ptr [[R]], i32 0, i32 0
+// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[COERCE_DIVE4]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call noundef ptr @_ZN1HC2E1bS0_(ptr noundef nonnull align 4 dereferenceable(16) [[THIS2]], i8 [[TMP0]], i8 [[TMP1]])
+// CHECK-NEXT: ret ptr [[THIS2]]
+//
+//
+// CHECK-LABEL: define {{[^@]+}}@_ZZNK1FIdE2akER1HENKUlS1_E_clES1_
+// CHECK-SAME: (ptr noundef nonnull align 1 dereferenceable(1) [[THIS:%.*]], [2 x i64] [[DOTCOERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = alloca [[CLASS_H:%.*]], align 4
+// CHECK-NEXT: [[THIS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[TMP:%.*]] = alloca [[CLASS_I:%.*]], align 1
+// CHECK-NEXT: [[__RANGE3:%.*]] = alloca ptr, align 8
+// CHECK-NEXT: [[__END3:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[AGG_TMP:%.*]] = alloca [[CLASS_O:%.*]], align 1
+// CHECK-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[AGG_TMP3:%.*]] = alloca [[CLASS_G:%.*]], align 1
+// CHECK-NEXT: [[DOTCAPTURE_EXPR_7:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[DOTCAPTURE_EXPR_8:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[AGG_TMP9:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[__BEGIN3:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[__BEGIN313:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[AGG_TMP14:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8
+// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[__BEGIN318:%.*]] = alloca [[CLASS_I]], align 1
+// CHECK-NEXT: [[D:%.*]] = alloca i32, align 4
+// CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]])
+// CHECK-NEXT: store [2 x i64] [[DOTCOERCE]], ptr [[TMP0]], align 4
+// CHECK-NEXT: store ptr [[THIS]], ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: [[THIS1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8
+// CHECK-NEXT: store ptr [[THIS1]], ptr [[__RANGE3]], align 8
+// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[__RANGE3]], align 8, !nonnull [[META3]]
+// CHECK-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds nuw [[CLASS_O]], ptr [[AGG_TMP]], i32 0, i32 0
+// CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[COERCE_DIVE]], align 1
+// CHECK-NEXT: [[CALL:%.*]] = call i8 @_Z3endI1oEDaT_(i8 [[TMP3]])
+// CHECK-NEXT: [[COERCE_DIVE2:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[__END3]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[CALL]], ptr [[COERCE_DIVE2]], align 1
+// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[__RANGE3]], align 8, !nonnull [[META3]]
+// CHECK-NEXT: [[COERCE_DIVE4:%.*]] = getelementptr inbounds nuw [[CLASS_G]], ptr [[AGG_TMP3]], i32 0, i32 0
+// CHECK-NEXT: [[TMP5:%.*]] = load i8, ptr [[COERCE_DIVE4]], align 1
+// CHECK-NEXT: [[CALL5:%.*]] = call i8 @_Z5begin1gI1cE(i8 [[TMP5]])
+// CHECK-NEXT: [[COERCE_DIVE6:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[DOTCAPTURE_EXPR_]], i32 0, i32 0
+// CHECK-NEXT: store i8 [[CALL5]], ptr [[COERCE_DIVE6]], align 1
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[DOTCAPTURE_EXPR_7]], ptr align 1 [[__END3]], i64 1, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AGG_TMP9]], ptr align 1 [[DOTCAPTURE_EXPR_]], i64 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE10:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[AGG_TMP9]], i32 0, i32 0
+// CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[COERCE_DIVE10]], align 1
+// CHECK-NEXT: [[COERCE_VAL_II:%.*]] = zext i8 [[TMP6]] to i64
+// CHECK-NEXT: [[CALL11:%.*]] = call noundef i64 @_ZN1iI1cEmiES1_(ptr noundef nonnull align 1 dereferenceable(1) [[DOTCAPTURE_EXPR_7]], i64 [[COERCE_VAL_II]])
+// CHECK-NEXT: [[SUB:%.*]] = sub nsw i64 [[CALL11]], 1
+// CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[SUB]], 1
+// CHECK-NEXT: [[DIV:%.*]] = sdiv i64 [[ADD]], 1
+// CHECK-NEXT: [[SUB12:%.*]] = sub nsw i64 [[DIV]], 1
+// CHECK-NEXT: store i64 [[SUB12]], ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[__BEGIN3]], ptr align 1 [[DOTCAPTURE_EXPR_]], i64 1, i1 false)
+// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 [[AGG_TMP14]], ptr align 1 [[__END3]], i64 1, i1 false)
+// CHECK-NEXT: [[COERCE_DIVE15:%.*]] = getelementptr inbounds nuw [[CLASS_I]], ptr [[AGG_TMP14]], i32 0, i32 0
+// CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[COERCE_DIVE15]], align 1
+// CHECK-NEXT: [[COERCE_VAL_II16:%.*]] = zext i8 [[TMP7]] to i64
+// CHECK-NEXT: [[CALL17:%.*]] = call noundef i1 @_ZN1iI1cEneES1_(ptr noundef nonnull align 1 dereferenceable(1) [[__BEGIN3]], i64 [[COERCE_VAL_II16]])
+// CHECK-NEXT: br i1 [[CALL17]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+// CHECK: omp.precond.then:
+// CHECK-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8
+// CHECK-NEXT: store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
+// CHECK-NEXT: call void @__kmpc_for_static_init_8(ptr @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 34, ptr [[DOTOMP_IS_LAST]], ptr [[DOTOMP_LB]], ptr [[DOTOMP_UB]], ptr [[DOTOMP_STRIDE]], i64 1, i64 1)
+// CHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_8]], align 8
+// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[TMP9]], [[TMP10]]
+// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+// CHECK: ...
[truncated]
|
// Privatize original counter variable (e.g., __beginN, __endN, __rangeN) | ||
if (!LocalDeclMap.count(VD)) { | ||
Address Addr = CreateMemTemp(VD->getType(), VD->getName()); | ||
LocalDeclMap.insert({VD, Addr}); | ||
(void)LoopScope.addPrivate(VD, Addr); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I assume this is too broad. Why the original variables are not captured and privatized? Looks like we're missing some analysis
template <typename am> void F<am>::ak(H &an) const { | ||
auto ao = an.s(); | ||
auto ap = [ao](H) { | ||
#pragma omp for |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
- hmm, the subject says about parallel for, but here it is only for. Why is that?
- Need to simplify the test
Previously, implicit variable like
__begin3
used in range-based OpenMP for-loops were not being properly privatized, leading to missing entries in LocalDeclMap and crashes.This patch ensures such implicit loop counters are handled in
EmitOMPPrivateLoopCounters
by allocating memory and registering them in the local declaration map before loop codegen.This fixes #146335