Clang mistakenly elides coroutine allocation resulting in a segfault and `stack-use-after-return` from `AddressSanitizer`

The problem was submitted in https://github.com/llvm/llvm-project/issues/56513 and https://github.com/llvm/llvm-project/issues/56455 but there were no responses.
I thought it was specific to Windows, but it turned out to happen also on Linux with Clang 15.0.6 with optimization level `-O3` the address sanitizer detects the access of stack after the coroutine returns.
The code in the [#56513](https://github.com/llvm/llvm-project/issues/56513) had a race condition problem in `final_suspend` but it was not the cause of the problem.

Compile the following code with:
```shell
clang++-15 clangcorobug.cpp -std=c++20 -O3 -g -fsanitize=address -lpthread -o corobug
```

```cpp
#include <atomic>
#include <thread>
#include <condition_variable>
#include <coroutine>
#include <variant>
#include <deque>
#include <cassert>

// executor and operation base

class bug_any_executor;

struct bug_async_op_base {
	void invoke() {
		invoke_operation();
	}

protected:

	~bug_async_op_base() = default;

	virtual void invoke_operation() = 0;
};

class bug_any_executor {
	using op_type = bug_async_op_base;

public:

	virtual ~bug_any_executor() = default;

	// removing noexcept enables clang to find that the pointer has escaped
	virtual void post(op_type& op) noexcept = 0;

	virtual void wait() noexcept = 0;
};

class bug_thread_executor : public bug_any_executor {
	void work_thd() {
		while (!ops_.empty()) {
			std::unique_lock<std::mutex> lock{ lock_ };
			cv_.wait(lock, [this] { return !ops_.empty(); });

			while (!ops_.empty()) {
				bug_async_op_base* op = ops_.front();
				ops_.pop_front();
				op->invoke();
			}
		}

		cv_.notify_all();
	}

	std::mutex lock_;
	std::condition_variable cv_;
	std::deque<bug_async_op_base*> ops_;
	std::thread thd_;

public:

	void start() {
		thd_ = std::thread(&bug_thread_executor::work_thd, this);
	}

	~bug_thread_executor() {
		if (thd_.joinable())
			thd_.join();
	}

	// although this implementation is not realy noexcept due to allocation but I have a real one that is and required to be noexcept
	virtual void post(bug_async_op_base& op) noexcept override {
		{
			std::unique_lock<std::mutex> lock{ lock_ };
			ops_.push_back(&op);
		}
		cv_.notify_all();
	}

	virtual void wait() noexcept override {
		std::unique_lock<std::mutex> lock{ lock_ };
		cv_.wait(lock, [this] { return ops_.empty(); });
	}
};

// task and promise

struct bug_final_suspend_notification {
	virtual std::coroutine_handle<> get_waiter() = 0;
};

class bug_task;

class bug_resume_waiter {
public:
	bug_resume_waiter(std::variant<std::coroutine_handle<>, bug_final_suspend_notification*> waiter) noexcept : waiter_{ waiter } {}

	constexpr bool await_ready() const noexcept { return false; }

	std::coroutine_handle<> await_suspend(std::coroutine_handle<>) noexcept {
		return waiter_.index() == 0 ? std::get<0>(waiter_) : std::get<1>(waiter_)->get_waiter();
	}

	constexpr void await_resume() const noexcept {}

private:
	std::variant<std::coroutine_handle<>, bug_final_suspend_notification*> waiter_;
};

class bug_task_promise {
	friend bug_task;
public:

	bug_task get_return_object() noexcept;

	constexpr std::suspend_always initial_suspend() noexcept { return {}; }

	bug_resume_waiter final_suspend() noexcept {
		return bug_resume_waiter{ waiter_ };
	}

	void unhandled_exception() noexcept {
		ex_ptr = std::current_exception();
	}

	constexpr void return_void() const noexcept {}

	void get_result() const {
		if (ex_ptr)
			std::rethrow_exception(ex_ptr);
	}

	std::variant<std::monostate, std::exception_ptr> result_or_error() const noexcept {
		if (ex_ptr)
			return ex_ptr;
		return {};
	}

private:
	std::variant<std::coroutine_handle<>, bug_final_suspend_notification*> waiter_;
	std::exception_ptr ex_ptr = nullptr;
};

class bug_task {
	friend bug_task_promise;
	using handle = std::coroutine_handle<>;
	using promise_t = bug_task_promise;

	bug_task(handle coro, promise_t* p) noexcept : this_coro{ coro }, this_promise{ p } {
		//printf("task(%p) coroutine(%p) promise(%p)\n", this, this_coro.address(), this_promise);
	}

public:

	using promise_type = bug_task_promise;

	bug_task(bug_task&& other) noexcept
		: this_coro{ std::exchange(other.this_coro, nullptr) }, this_promise{ std::exchange(other.this_promise, nullptr) } { 
		printf("task(task&&: %p) coroutine(%p) promise(%p)\n", this, this_coro.address(), this_promise); 
	}

	~bug_task() {
		if (this_coro) {
			//printf("~task(%p) coroutine(%p) promise(%p)\n", this, this_coro.address(), this_promise);
			this_coro.destroy();
		}
	}

	constexpr bool await_ready() const noexcept {
		return false;
	}

	handle await_suspend(handle waiter) noexcept {
		assert(this_coro != nullptr && this_promise != nullptr);
		this_promise->waiter_ = waiter;
		return this_coro;
	}

	void await_resume() {
		return this_promise->get_result();
	}

	bool is_valid() const noexcept {
		return this_promise != nullptr && this_coro != nullptr;
	}

	void start_coro(bug_final_suspend_notification& w) noexcept {
		assert(this_promise != nullptr && this_coro != nullptr);
		this_promise->waiter_ = &w;
		this_coro.resume(); // never throws since all exceptions are caught by the promise
	}

private:
	handle this_coro;
	promise_t* this_promise;
};

bug_task bug_task_promise::get_return_object() noexcept {
	return { std::coroutine_handle<bug_task_promise>::from_promise(*this), this };
}

// spawn operation and spawner

template<class Handler>
class bug_spawn_op final : public bug_async_op_base, bug_final_suspend_notification {
	Handler handler;
	bug_task task_;

public:

	bug_spawn_op(Handler handler, bug_task&& t)
		: handler { handler }, task_{ std::move(t) } {}

	virtual void invoke_operation() override {
		printf("starting the coroutine\n");
		task_.start_coro(*this);
		printf("started the coroutine\n");
	}

	virtual std::coroutine_handle<> get_waiter() override {
                auto handler2 = std::move(handler);
                delete this;
		handler2();
		return std::noop_coroutine();
	}
};

struct dummy_spawn_handler_t {
	constexpr void operator()() const noexcept {}
};

void bug_spawn(bug_any_executor& ex, bug_task&& t) {
	using op_t = bug_spawn_op<dummy_spawn_handler_t>;
	op_t* op = new op_t{ dummy_spawn_handler_t{}, std::move(t) };
	ex.post(*op);
}

class bug_spawner;

struct bug_spawner_awaiter {
	bug_spawner& s;
	std::coroutine_handle<> waiter;

	bug_spawner_awaiter(bug_spawner& s) : s{ s } {}

	bool await_ready() const noexcept;

	void await_suspend(std::coroutine_handle<> coro);

	void await_resume() {}
};

class bug_spawner {
	friend bug_spawner_awaiter;

	struct final_handler_t {
		bug_spawner& s;

		void operator()() {
			s.on_spawn_finished();
		}
	};

public:

	bug_spawner(bug_any_executor& ex) : ex_{ ex } {}

	void spawn(bug_task&& t) {
		using op_t = bug_spawn_op<final_handler_t>;
		// move task into ptr
		op_t* ptr = new op_t(final_handler_t{ *this }, std::move(t));
		++count_;
		ex_.post(*ptr); // ptr escapes here thus task escapes but clang can't deduce that unless post() is not noexcept
	}

	bug_spawner_awaiter wait() noexcept { return { *this }; }

	void on_spawn_finished()
	{
		if (!--count_ && awaiter_)
		{
			auto a = std::exchange(awaiter_, nullptr);
			a->waiter.resume();
		}
	}

private:

	bug_any_executor& ex_; // if bug_thread_executor& is used instead enables clang to detect the escape of the promise
	bug_spawner_awaiter* awaiter_ = nullptr;
	std::atomic<std::size_t> count_ = 0;
};

bool bug_spawner_awaiter::await_ready() const noexcept {
	return s.count_ == 0;
}

void bug_spawner_awaiter::await_suspend(std::coroutine_handle<> coro) {
	waiter = coro;
	s.awaiter_ = this;
}

template<std::invocable<bug_spawner&> Fn>
bug_task scoped_spawn(bug_any_executor& ex, Fn fn) {
	bug_spawner s{ ex };
	std::exception_ptr ex_ptr;

	try
	{
		fn(s);
	}
	catch (const std::exception& ex) // ex instead of ... to observe the address of ex
	{
		printf("caught an exception from fn(s): %p\n", std::addressof(ex));
		ex_ptr = std::current_exception();
	}

	co_await s.wait();
	if (ex_ptr)
		std::rethrow_exception(ex_ptr);
}

// forked task to start the coroutine from sync code

struct bug_forked_task_promise;

class bug_forked_task {
	friend struct bug_forked_task_promise;
	bug_forked_task() = default;
public:
	using promise_type = bug_forked_task_promise;
};

struct bug_forked_task_promise {
	bug_forked_task get_return_object() noexcept { return {}; }

	constexpr std::suspend_never initial_suspend() noexcept { return {}; }

	constexpr std::suspend_never final_suspend() noexcept { return {}; }

	void unhandled_exception() noexcept {
		std::terminate();
	}

	constexpr void return_void() const noexcept {}
};

// test case

bug_task bug_spawned_task(int id, int inc, std::atomic<int>& n) {
	int result = n += inc;
	std::string msg = "count in coro (" + std::to_string(id) + ") = " + std::to_string(result);
	printf("%s\n", msg.c_str());
	co_return;
}

// using bug_thread_executor& instead of bug_any_executor& resolves the problem
bug_forked_task run_coros(bug_any_executor& ex) {
	std::atomic<int> count = 0;
	auto throwing_fn = [&](bug_spawner& s) {
		int frame_ptr = 0;
		printf("frame ptr ptr: %p\n", std::addressof(frame_ptr));
		s.spawn(bug_spawned_task(1, 2, count)); // the coroutine frame is allocated on the stack !
		s.spawn(bug_spawned_task(2, 3, count));
		s.spawn(bug_spawned_task(3, 5, count));
                // commenting the following line hides the problem
		throw std::runtime_error{ "catch this !" }; // on windows allocated on the stack as required by msvc c++ abi
	};

	try {
		co_await scoped_spawn(ex, throwing_fn);
	}
	catch (const std::exception& ex) {
		printf("scoped_spawn propagated exception: %s\n", ex.what());
	}

	printf("count after scoped_spawn: %d\n", count.load());
}


int main() {
	int var = 0;
	bug_thread_executor ex;
	printf("stack address: %p\n", std::addressof(var));
	run_coros(ex);
	ex.start();
	ex.wait();
	return 0;
}
```
the run `./corobug` and you will get something like this `AddressSanitizer: stack-use-after-return on address 0x7fb1ba9f9338 at pc 0x7fb1bd4d3b58 bp 0x7fb1ba1efd40 sp 0x7fb1ba1efd38`
without the address sanitizer a segfault is received.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Clang mistakenly elides coroutine allocation resulting in a segfault and `stack-use-after-return` from `AddressSanitizer` #59723

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Clang mistakenly elides coroutine allocation resulting in a segfault and stack-use-after-return from AddressSanitizer #59723

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions

Clang mistakenly elides coroutine allocation resulting in a segfault and `stack-use-after-return` from `AddressSanitizer` #59723