Skip to content

Commit 957568d

Browse files
Kunchddavikisrabbani
authored
[Core] Remove reference counter mock for real reference counter in testing (#57178)
This commit takes a step towards utilizing real classes during unit testing instead of mocks for components that are simple and local (e.g. reference counter). We remove the reference counter mock as promised in #57177 (review). By directly testing on the real reference_counter, we are able to catch a behavior mismatch between AddNewActorHandle and its doc described behavior. Specifically, the EmplaceNewActorHandle doc states that the function should return false instead of failing due to ray check and the test tests exactly that. However, due to calling AddOwnedObject, EmplaceNewActorHandle actually ray check fails. The commit addresses this issue by making AddNewActorHandle idempotent. --------- Signed-off-by: davik <davik@anyscale.com> Signed-off-by: Kunchen (David) Dai <54918178+Kunchd@users.noreply.github.com> Co-authored-by: davik <davik@anyscale.com> Co-authored-by: Ibrahim Rabbani <irabbani@anyscale.com> Co-authored-by: Ibrahim Rabbani <israbbani@gmail.com>
1 parent 93dad3b commit 957568d

File tree

12 files changed

+130
-260
lines changed

12 files changed

+130
-260
lines changed

src/mock/ray/core_worker/reference_counter.h

Lines changed: 0 additions & 218 deletions
This file was deleted.

src/ray/core_worker/actor_manager.cc

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -80,10 +80,10 @@ std::pair<std::shared_ptr<const ActorHandle>, Status> ActorManager::GetNamedActo
8080
if (status.ok()) {
8181
auto actor_handle = std::make_unique<ActorHandle>(actor_table_data, task_spec);
8282
actor_id = actor_handle->GetActorID();
83-
AddNewActorHandle(std::move(actor_handle),
84-
call_site,
85-
caller_address,
86-
/*owned*/ false);
83+
EmplaceNewActorHandle(std::move(actor_handle),
84+
call_site,
85+
caller_address,
86+
/*owned*/ false);
8787
} else {
8888
// Use a NIL actor ID to signal that the actor wasn't found.
8989
RAY_LOG(DEBUG) << "Failed to look up actor with name: " << name;
@@ -129,16 +129,24 @@ std::shared_ptr<ActorHandle> ActorManager::GetActorHandleIfExists(
129129
return nullptr;
130130
}
131131

132-
bool ActorManager::AddNewActorHandle(std::unique_ptr<ActorHandle> actor_handle,
133-
const std::string &call_site,
134-
const rpc::Address &caller_address,
135-
bool owned) {
132+
bool ActorManager::EmplaceNewActorHandle(std::unique_ptr<ActorHandle> actor_handle,
133+
const std::string &call_site,
134+
const rpc::Address &caller_address,
135+
bool owned) {
136136
const auto &actor_id = actor_handle->GetActorID();
137137
const auto actor_creation_return_id = ObjectID::ForActorHandle(actor_id);
138+
139+
{
140+
absl::MutexLock lock(&mutex_);
141+
if (actor_handles_.contains(actor_id)) {
142+
return false;
143+
}
144+
}
145+
138146
// Detached actor doesn't need ref counting.
139147
if (owned) {
140148
reference_counter_.AddOwnedObject(actor_creation_return_id,
141-
/*inner_ids=*/{},
149+
/*contained_ids=*/{},
142150
caller_address,
143151
call_site,
144152
/*object_size*/ -1,

src/ray/core_worker/actor_manager.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class ActorManager {
106106
///
107107
/// NOTE: Getting an actor handle from GCS (named actor) is considered as adding a new
108108
/// actor handle.
109+
/// NOTE: Attempting to add the same actor in parallel can cause RAY CHECK failure.
109110
///
110111
/// \param actor_handle The handle to the actor.
111112
/// \param[in] call_site The caller's site.
@@ -114,10 +115,10 @@ class ActorManager {
114115
/// task.
115116
/// \return True if the handle was added and False if we already had a handle to
116117
/// the same actor.
117-
bool AddNewActorHandle(std::unique_ptr<ActorHandle> actor_handle,
118-
const std::string &call_site,
119-
const rpc::Address &caller_address,
120-
bool owned);
118+
bool EmplaceNewActorHandle(std::unique_ptr<ActorHandle> actor_handle,
119+
const std::string &call_site,
120+
const rpc::Address &caller_address,
121+
bool owned);
121122

122123
/// Wait for actor reference deletion.
123124
///

src/ray/core_worker/core_worker.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,9 +2164,11 @@ Status CoreWorker::CreateActor(const RayFunction &function,
21642164
// Add the actor handle before we submit the actor creation task, since the
21652165
// actor handle must be in scope by the time the GCS sends the
21662166
// WaitForActorRefDeletedRequest.
2167-
RAY_CHECK(actor_manager_->AddNewActorHandle(
2167+
RAY_CHECK(actor_manager_->EmplaceNewActorHandle(
21682168
std::move(actor_handle), CurrentCallSite(), rpc_address_, /*owned=*/!is_detached))
2169-
<< "Actor " << actor_id << " already exists";
2169+
<< "Attempt to emplace new actor handle for the actor being created with actor id: "
2170+
<< actor_id
2171+
<< " failed because an actor handle with the same actor id has already been added";
21702172
*return_actor_id = actor_id;
21712173
TaskSpecification task_spec = std::move(builder).ConsumeAndBuild();
21722174
RAY_LOG(DEBUG) << "Submitting actor creation task " << task_spec.DebugString();

src/ray/core_worker/core_worker.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,9 @@ class CoreWorker {
865865

866866
/// Create an actor.
867867
///
868+
/// NOTE: RAY CHECK fails if an actor handle with the same actor id has already been
869+
/// added, or if the scheduling strategy for actor creation is not set.
870+
///
868871
/// \param[in] caller_id ID of the task submitter.
869872
/// \param[in] function The remote function that generates the actor object.
870873
/// \param[in] args Arguments of this task.

src/ray/core_worker/reference_counter_interface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ class ReferenceCounterInterface {
125125
/// owner ID will change for workers executing normal tasks and it is
126126
/// possible to have leftover references after a task has finished.
127127
///
128+
/// NOTE: RAY CHECK fails if the object was already added.
129+
///
128130
/// \param[in] object_id The ID of the object that we own.
129131
/// \param[in] contained_ids ObjectIDs that are contained in the object's value.
130132
/// As long as the object_id is in scope, the inner objects should not be GC'ed.

src/ray/core_worker/task_submission/tests/BUILD.bazel

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,10 @@ ray_cc_test(
3434
tags = ["team:core"],
3535
deps = [
3636
"//:ray_mock",
37+
"//src/ray/core_worker:reference_counter",
3738
"//src/ray/core_worker/task_submission:actor_task_submitter",
39+
"//src/ray/pubsub:fake_publisher",
40+
"//src/ray/pubsub:fake_subscriber",
3841
"@com_google_googletest//:gtest",
3942
"@com_google_googletest//:gtest_main",
4043
],
@@ -54,6 +57,8 @@ ray_cc_test(
5457
"//src/ray/core_worker:reference_counter",
5558
"//src/ray/core_worker:task_manager",
5659
"//src/ray/core_worker_rpc_client:fake_core_worker_client",
60+
"//src/ray/pubsub:fake_publisher",
61+
"//src/ray/pubsub:fake_subscriber",
5762
"@com_google_googletest//:gtest",
5863
"@com_google_googletest//:gtest_main",
5964
],

0 commit comments

Comments
 (0)