[Coroutines] [CoroElide] Don't think exceptional terminator don't leak coro handle unconditionally any more

ChuanqiXu9 · razmser · commit fd97553a4811 · 2023-10-03T16:05:15.000+03:00
Close llvm#59723. The fundamental cause of the above issue is that we assumed the memory of coroutine frame can be released by stack unwinding automatically if the allocation of the coroutine frame is elided. But we missed one point: the stack unwinding has different semantics with the explicit coroutine_handle<>::destroy(). Since the latter is explicit so it shows the intention of the user. So we can blame the user to destroy the coroutine frame incorrectly in case of use-after-free happens. But we can't do so with stack unwinding. So after this patch, we won't think the exceptional terminator don't leak the coroutine handle unconditionally. Instead, we think the exceptional terminator will leak the coroutine handle too if the coroutine is leaked somewhere along the search path. Concretely for C++, we can think the exceptional terminator is not special any more. Maybe this may cause some performance regressions. But I've tested the motivating example (std::generator). And on the other side, the coroutine elision is a middle end opitmization and not a language feature. So we don't think we should blame such regressions especially we are correcting the miscompilations.
diff --git a/clang/test/CodeGenCoroutines/coro-halo.cpp b/clang/test/CodeGenCoroutines/coro-halo.cpp
@@ -1,5 +1,7 @@
 // This tests that the coroutine heap allocation elision optimization could happen succesfully.
 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -O2 -emit-llvm %s \
+// RUN:   -fcxx-exceptions -fexceptions -o - | FileCheck %s
 
 #include "Inputs/coroutine.h"
 #include "Inputs/numeric.h"
diff --git a/clang/test/CodeGenCoroutines/pr59723.cpp b/clang/test/CodeGenCoroutines/pr59723.cpp
@@ -0,0 +1,237 @@
+// This is reduced test case from https://github.com/llvm/llvm-project/issues/59723.
+// This is not a minimal reproducer intentionally to check the compiler's ability.
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++20 -fcxx-exceptions\
+// RUN:     -fexceptions -O2 -emit-llvm %s -o - | FileCheck %s
+
+#include "Inputs/coroutine.h"
+
+// executor and operation base
+
+class bug_any_executor;
+
+struct bug_async_op_base
+{
+	void invoke();
+
+protected:
+
+	~bug_async_op_base() = default;
+};
+
+class bug_any_executor
+{
+	using op_type = bug_async_op_base;
+
+public:
+
+	virtual ~bug_any_executor() = default;
+
+	// removing noexcept enables clang to find that the pointer has escaped
+	virtual void post(op_type& op) noexcept = 0;
+
+	virtual void wait() noexcept = 0;
+};
+
+class bug_thread_executor : public bug_any_executor
+{
+
+public:
+
+	void start()
+	{
+		
+	}
+
+	~bug_thread_executor()
+	{
+	}
+
+	// although this implementation is not realy noexcept due to allocation but I have a real one that is and required to be noexcept
+	virtual void post(bug_async_op_base& op) noexcept override;
+
+	virtual void wait() noexcept override
+	{
+		
+	}
+};
+
+// task and promise
+
+struct bug_final_suspend_notification
+{
+	virtual std::coroutine_handle<> get_waiter() = 0;
+};
+
+class bug_task;
+
+class bug_task_promise
+{
+	friend bug_task;
+public:
+
+	bug_task get_return_object() noexcept;
+
+	constexpr std::suspend_always initial_suspend() noexcept { return {}; }
+
+	std::suspend_always final_suspend() noexcept 
+	{
+		return {};
+	}
+
+	void unhandled_exception() noexcept;
+
+	constexpr void return_void() const noexcept {}
+
+	void get_result() const
+	{
+		
+	}
+};
+
+template <class T, class U>
+T exchange(T &&t, U &&u) {
+    T ret = t;
+    t = u;
+    return ret;
+}
+
+class bug_task
+{
+	friend bug_task_promise;
+	using handle = std::coroutine_handle<>;
+	using promise_t = bug_task_promise;
+
+	bug_task(handle coro, promise_t* p) noexcept : this_coro{ coro }, this_promise{ p }
+	{
+	
+	}
+
+public:
+	using promise_type = bug_task_promise;
+
+    bug_task(bug_task&& other) noexcept
+		: this_coro{ exchange(other.this_coro, nullptr) }, this_promise{ exchange(other.this_promise, nullptr) } { 
+		
+	}
+
+	~bug_task()
+	{
+		if (this_coro)
+			this_coro.destroy();
+	}
+
+	constexpr bool await_ready() const noexcept
+	{
+		return false;
+	}
+
+	handle await_suspend(handle waiter) noexcept
+	{
+		return this_coro;
+	}
+
+	void await_resume() 
+	{
+		return this_promise->get_result();
+	}
+
+	handle this_coro;
+	promise_t* this_promise;
+};
+
+bug_task bug_task_promise::get_return_object() noexcept
+{
+	return { std::coroutine_handle<bug_task_promise>::from_promise(*this), this };
+}
+
+// spawn operation and spawner
+
+template<class Handler>
+class bug_spawn_op final : public bug_async_op_base, bug_final_suspend_notification
+{
+	Handler handler;
+	bug_task task_;
+
+public:
+
+	bug_spawn_op(Handler handler, bug_task&& t)
+		: handler { handler }, task_{ static_cast<bug_task&&>(t) } {}
+
+	virtual std::coroutine_handle<> get_waiter() override
+	{
+		handler();
+		return std::noop_coroutine();
+	}
+};
+
+class bug_spawner;
+
+struct bug_spawner_awaiter
+{
+	bug_spawner& s;
+	std::coroutine_handle<> waiter;
+
+	bug_spawner_awaiter(bug_spawner& s) : s{ s } {}
+
+	bool await_ready() const noexcept;
+
+	void await_suspend(std::coroutine_handle<> coro);
+
+	void await_resume() {}
+};
+
+class bug_spawner
+{
+	friend bug_spawner_awaiter;
+
+	struct final_handler_t
+	{
+		bug_spawner& s;
+
+		void operator()()
+		{
+			s.awaiter_->waiter.resume();
+		}
+	};
+
+public:
+
+	bug_spawner(bug_any_executor& ex) : ex_{ ex } {}
+
+	void spawn(bug_task&& t) {
+		using op_t = bug_spawn_op<final_handler_t>;
+		// move task into ptr
+		op_t* ptr = new op_t(final_handler_t{ *this }, static_cast<bug_task&&>(t));
+		++count_;
+		ex_.post(*ptr); // ptr escapes here thus task escapes but clang can't deduce that unless post() is not noexcept
+	}
+
+	bug_spawner_awaiter wait() noexcept { return { *this }; }
+
+private:
+	bug_any_executor& ex_; // if bug_thread_executor& is used instead enables clang to detect the escape of the promise
+	bug_spawner_awaiter* awaiter_ = nullptr;
+	unsigned count_ = 0;
+};
+
+// test case
+
+bug_task bug_spawned_task(int id, int inc)
+{
+	co_return;
+}
+
+struct A {
+    A();
+};
+
+void throwing_fn(bug_spawner& s) {
+	s.spawn(bug_spawned_task(1, 2));
+    throw A{};
+}
+
+// Check that the coroutine frame of bug_spawned_task are allocated from operator new.
+// CHECK: define{{.*}}@_Z11throwing_fnR11bug_spawner
+// CHECK-NOT: alloc
+// CHECK: %[[CALL:.+]] = {{.*}}@_Znwm(i64{{.*}} 24)
+// CHECK: store ptr @_Z16bug_spawned_taskii.resume, ptr %[[CALL]]
diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -194,12 +194,49 @@ bool Lowerer::hasEscapePath(const CoroBeginInst *CB,
   for (auto *DA : It->second)
     Visited.insert(DA->getParent());
 
+  SmallPtrSet<const BasicBlock *, 32> EscapingBBs;
+  for (auto *U : CB->users()) {
+    // The use from coroutine intrinsics are not a problem.
+    if (isa<CoroFreeInst, CoroSubFnInst, CoroSaveInst>(U))
+      continue;
+
+    // Think all other usages may be an escaping candidate conservatively.
+    //
+    // Note that the major user of switch ABI coroutine (the C++) will store
+    // resume.fn, destroy.fn and the index to the coroutine frame immediately.
+    // So the parent of the coro.begin in C++ will be always escaping.
+    // Then we can't get any performance benefits for C++ by improving the
+    // precision of the method.
+    //
+    // The reason why we still judge it is we want to make LLVM Coroutine in
+    // switch ABIs to be self contained as much as possible instead of a
+    // by-product of C++20 Coroutines.
+    EscapingBBs.insert(cast<Instruction>(U)->getParent());
+  }
+
+  bool PotentiallyEscaped = false;
+
   do {
     const auto *BB = Worklist.pop_back_val();
     if (!Visited.insert(BB).second)
       continue;
-    if (TIs.count(BB))
-      return true;
+
+    // A Path insensitive marker to test whether the coro.begin escapes.
+    // It is intentional to make it path insensitive while it may not be
+    // precise since we don't want the process to be too slow.
+    PotentiallyEscaped |= EscapingBBs.count(BB);
+
+    if (TIs.count(BB)) {
+      if (!BB->getTerminator()->isExceptionalTerminator() || PotentiallyEscaped)
+        return true;
+
+      // If the function ends with the exceptional terminator, the memory used
+      // by the coroutine frame can be released by stack unwinding
+      // automatically. So we can think the coro.begin doesn't escape if it
+      // exits the function by exceptional terminator.
+
+      continue;
+    }
 
     // Conservatively say that there is potentially a path.
     if (!--Limit)
@@ -236,36 +273,36 @@ bool Lowerer::shouldElide(Function *F, DominatorTree &DT) const {
   // memory location storing that value and not the virtual register.
 
   SmallPtrSet<BasicBlock *, 8> Terminators;
-  // First gather all of the non-exceptional terminators for the function.
+  // First gather all of the terminators for the function.
   // Consider the final coro.suspend as the real terminator when the current
   // function is a coroutine.
-    for (BasicBlock &B : *F) {
-      auto *TI = B.getTerminator();
-      if (TI->getNumSuccessors() == 0 && !TI->isExceptionalTerminator() &&
-          !isa<UnreachableInst>(TI))
-        Terminators.insert(&B);
-    }
+  for (BasicBlock &B : *F) {
+    auto *TI = B.getTerminator();
+
+    if (TI->getNumSuccessors() != 0 || isa<UnreachableInst>(TI))
+      continue;
+
+    Terminators.insert(&B);
+  }
 
   // Filter out the coro.destroy that lie along exceptional paths.
   SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
   for (const auto &It : DestroyAddr) {
-    // If there is any coro.destroy dominates all of the terminators for the
-    // coro.begin, we could know the corresponding coro.begin wouldn't escape.
-    for (Instruction *DA : It.second) {
-      if (llvm::all_of(Terminators, [&](auto *TI) {
-            return DT.dominates(DA, TI->getTerminator());
-          })) {
-        ReferencedCoroBegins.insert(It.first);
-        break;
-      }
-    }
-
-    // Whether there is any paths from coro.begin to Terminators which not pass
-    // through any of the coro.destroys.
+    // If every terminators is dominated by coro.destroy, we could know the
+    // corresponding coro.begin wouldn't escape.
+    //
+    // Otherwise hasEscapePath would decide whether there is any paths from
+    // coro.begin to Terminators which not pass through any of the
+    // coro.destroys.
     //
     // hasEscapePath is relatively slow, so we avoid to run it as much as
     // possible.
-    if (!ReferencedCoroBegins.count(It.first) &&
+    if (llvm::all_of(Terminators,
+                     [&](auto *TI) {
+                       return llvm::any_of(It.second, [&](auto *DA) {
+                         return DT.dominates(DA, TI->getTerminator());
+                       });
+                     }) ||
         !hasEscapePath(It.first, Terminators))
       ReferencedCoroBegins.insert(It.first);
   }