ray-project
diff --git a/‎python/ray/tests/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎python/ray/tests/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎python/ray/tests/test_draining.py‎
Lines changed: 15 additions & 8 deletions b/‎python/ray/tests/test_draining.py‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎python/ray/tests/test_ray_get.py‎
Lines changed: 76 additions & 0 deletions b/‎python/ray/tests/test_ray_get.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎src/ray/core_worker/store_provider/plasma_store_provider.cc‎
Lines changed: 44 additions & 34 deletions b/‎src/ray/core_worker/store_provider/plasma_store_provider.cc‎
Lines changed: 44 additions & 34 deletions
diff --git a/‎src/ray/core_worker/store_provider/plasma_store_provider.h‎
Lines changed: 7 additions & 7 deletions b/‎src/ray/core_worker/store_provider/plasma_store_provider.h‎
Lines changed: 7 additions & 7 deletions
diff --git a/‎src/ray/flatbuffers/node_manager.fbs‎
Lines changed: 9 additions & 4 deletions b/‎src/ray/flatbuffers/node_manager.fbs‎
Lines changed: 9 additions & 4 deletions
@@ -884,6 +884,7 @@ py_test_module_list(
         "test_dataclient_disconnect.py",
         "test_iter.py",
         "test_placement_group.py",
+        "test_ray_get.py",
         "test_state_api_2.py",
         "test_task_events.py",
         "test_unavailable_actors.py",
 
@@ -427,14 +427,21 @@ def ping(self):
 
     # Simulate autoscaler terminates the worker node after the draining deadline.
     cluster.remove_node(node2, graceful)
-    try:
-        ray.get(actor.ping.remote())
-        raise
-    except ray.exceptions.ActorDiedError as e:
-        assert e.preempted
-        if graceful:
-            assert "The actor died because its node has died." in str(e)
-            assert "the actor's node was preempted: " + drain_reason_message in str(e)
+
+    def check_actor_died_error():
+        try:
+            ray.get(actor.ping.remote())
+            return False
+        except ray.exceptions.ActorDiedError as e:
+            assert e.preempted
+            if graceful:
+                assert "The actor died because its node has died." in str(e)
+                assert "the actor's node was preempted: " + drain_reason_message in str(
+                    e
+                )
+        return True
+
+    wait_for_condition(check_actor_died_error)
 
 
 def test_drain_node_actor_restart(ray_start_cluster):
 
@@ -0,0 +1,76 @@
+import sys
+import threading
+import time
+
+import numpy as np
+import pytest
+
+import ray
+
+
+def test_multithreaded_ray_get(ray_start_cluster):
+    # This test tries to get a large object from the head node to the worker node
+    # while making many concurrent ray.get requests for a local object in plasma.
+    # TODO(57923): Make this not rely on timing if possible.
+    ray_cluster = ray_start_cluster
+    ray_cluster.add_node(
+        # This will make the object transfer slower and allow the test to
+        # interleave Get requests.
+        _system_config={
+            "object_manager_max_bytes_in_flight": 1024**2,
+        }
+    )
+    ray.init(address=ray_cluster.address)
+    ray_cluster.add_node(resources={"worker": 1})
+
+    # max_concurrency >= 3 is required: one thread for small gets, one for large gets,
+    # one for setting the threading.Events.
+    @ray.remote(resources={"worker": 1}, max_concurrency=3)
+    class Actor:
+        def __init__(self):
+            # ray.put will ensure that the object is in plasma
+            # even if it's small.
+            self._local_small_ref = ray.put("1")
+
+            # Used to check the thread running the small `ray.gets` has made at least
+            # one API call successfully.
+            self._small_gets_started = threading.Event()
+
+            # Used to tell the thread running small `ray.gets` to exit.
+            self._stop_small_gets = threading.Event()
+
+        def small_gets_started(self):
+            self._small_gets_started.wait()
+
+        def stop_small_gets(self):
+            self._stop_small_gets.set()
+
+        def do_small_gets(self):
+            while not self._stop_small_gets.is_set():
+                ray.get(self._local_small_ref)
+                time.sleep(0.01)
+                self._small_gets_started.set()
+
+        def do_large_get(self, refs_to_get):
+            remote_large_ref = refs_to_get[0]
+            ray.get(remote_large_ref)
+
+    actor = Actor.remote()
+
+    # Start a task on one thread that will repeatedly call `ray.get` on small
+    # plasma objects.
+    small_gets_ref = actor.do_small_gets.remote()
+    ray.get(actor.small_gets_started.remote())
+
+    # Start a second task on another thread that will call `ray.get` on a large object.
+    # The transfer will be slow due to the system config set above.
+    large_ref = ray.put(np.ones(1024**3, dtype=np.int8))
+    ray.get(actor.do_large_get.remote([large_ref]))
+
+    # Check that all `ray.get` calls succeeded.
+    ray.get(actor.stop_small_gets.remote())
+    ray.get(small_gets_ref)
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main(["-sv", __file__]))
@@ -15,6 +15,7 @@
 #include "ray/core_worker/store_provider/plasma_store_provider.h"
 
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <utility>
@@ -177,23 +178,20 @@ Status CoreWorkerPlasmaStoreProvider::Release(const ObjectID &object_id) {
   return store_client_->Release(object_id);
 }
 
-Status CoreWorkerPlasmaStoreProvider::PullObjectsAndGetFromPlasmaStore(
+Status CoreWorkerPlasmaStoreProvider::GetObjectsFromPlasmaStore(
     absl::flat_hash_set<ObjectID> &remaining,
-    const std::vector<ObjectID> &batch_ids,
+    const std::vector<ObjectID> &ids,
     int64_t timeout_ms,
     absl::flat_hash_map<ObjectID, std::shared_ptr<RayObject>> *results,
     bool *got_exception) {
-  const auto owner_addresses = reference_counter_.GetOwnerAddresses(batch_ids);
-  RAY_RETURN_NOT_OK(raylet_ipc_client_->AsyncGetObjects(batch_ids, owner_addresses));
-
   std::vector<plasma::ObjectBuffer> plasma_results;
-  RAY_RETURN_NOT_OK(store_client_->Get(batch_ids, timeout_ms, &plasma_results));
+  RAY_RETURN_NOT_OK(store_client_->Get(ids, timeout_ms, &plasma_results));
 
   // Add successfully retrieved objects to the result map and remove them from
   // the set of IDs to get.
   for (size_t i = 0; i < plasma_results.size(); i++) {
     if (plasma_results[i].data != nullptr || plasma_results[i].metadata != nullptr) {
-      const auto &object_id = batch_ids[i];
+      const auto &object_id = ids[i];
       std::shared_ptr<TrackedBuffer> data = nullptr;
       std::shared_ptr<Buffer> metadata = nullptr;
       if (plasma_results[i].data && plasma_results[i].data->Size() > 0) {
@@ -216,7 +214,6 @@ Status CoreWorkerPlasmaStoreProvider::PullObjectsAndGetFromPlasmaStore(
       (*results)[object_id] = std::move(result_object);
     }
   }
-
   return Status::OK();
 }
 
@@ -258,37 +255,48 @@ Status CoreWorkerPlasmaStoreProvider::Get(
     const absl::flat_hash_set<ObjectID> &object_ids,
     int64_t timeout_ms,
     absl::flat_hash_map<ObjectID, std::shared_ptr<RayObject>> *results) {
-  std::vector<ObjectID> batch_ids;
-  absl::flat_hash_set<ObjectID> remaining(object_ids.begin(), object_ids.end());
+  std::vector<ipc::ScopedResponse> get_request_cleanup_handlers;
 
-  // Send initial requests to pull all objects in parallel.
-  std::vector<ObjectID> id_vector(object_ids.begin(), object_ids.end());
-  int64_t total_size = static_cast<int64_t>(object_ids.size());
   bool got_exception = false;
-  for (int64_t start = 0; start < total_size; start += fetch_batch_size_) {
+  absl::flat_hash_set<ObjectID> remaining(object_ids.begin(), object_ids.end());
+  std::vector<ObjectID> id_vector(object_ids.begin(), object_ids.end());
+  std::vector<ObjectID> batch_ids;
+
+  int64_t num_total_objects = static_cast<int64_t>(object_ids.size());
+
+  // TODO(57923): Need to understand if batching is necessary. If it's necessary,
+  // then the reason needs to be documented.
+  for (int64_t start = 0; start < num_total_objects; start += fetch_batch_size_) {
     batch_ids.clear();
-    for (int64_t i = start; i < start + fetch_batch_size_ && i < total_size; i++) {
+    for (int64_t i = start; i < start + fetch_batch_size_ && i < num_total_objects; i++) {
       batch_ids.push_back(id_vector[i]);
     }
+
+    // 1. Make the request to pull all objects into local plasma if not local already.
+    std::vector<rpc::Address> owner_addresses =
+        reference_counter_.GetOwnerAddresses(batch_ids);
+    StatusOr<ipc::ScopedResponse> status_or_cleanup =
+        raylet_ipc_client_->AsyncGetObjects(batch_ids, owner_addresses);
+    RAY_RETURN_NOT_OK(status_or_cleanup.status());
+    get_request_cleanup_handlers.emplace_back(std::move(status_or_cleanup.value()));
+
+    // 2. Try to Get all objects that are already local from the plasma store.
     RAY_RETURN_NOT_OK(
-        PullObjectsAndGetFromPlasmaStore(remaining,
-                                         batch_ids,
-                                         /*timeout_ms=*/0,
-                                         // Mutable objects must be local before ray.get.
-                                         results,
-                                         &got_exception));
+        GetObjectsFromPlasmaStore(remaining,
+                                  batch_ids,
+                                  /*timeout_ms=*/0,
+                                  // Mutable objects must be local before ray.get.
+                                  results,
+                                  &got_exception));
   }
 
-  // If all objects were fetched successfully or if any of the returned
-  // objects contain an exception, clean up the Get request in the raylet
-  // and early exit.
   if (remaining.empty() || got_exception) {
-    return raylet_ipc_client_->CancelGetRequest();
+    return Status::OK();
   }
 
-  // If not all objects were successfully fetched, repeatedly call FetchOrReconstruct
-  // and Get from the local object store in batches. This loop will run indefinitely
-  // until the objects are all fetched if timeout is -1.
+  // 3. If not all objects were successfully fetched, repeatedly call
+  // GetObjectsFromPlasmaStore in batches. This loop will run indefinitely until the
+  // objects are all fetched if timeout is -1.
   bool should_break = false;
   bool timed_out = false;
   int64_t remaining_timeout = timeout_ms;
@@ -312,7 +320,7 @@ Status CoreWorkerPlasmaStoreProvider::Get(
     }
 
     size_t previous_size = remaining.size();
-    RAY_RETURN_NOT_OK(PullObjectsAndGetFromPlasmaStore(
+    RAY_RETURN_NOT_OK(GetObjectsFromPlasmaStore(
         remaining, batch_ids, batch_timeout, results, &got_exception));
     should_break = timed_out || got_exception;
 
@@ -322,7 +330,6 @@ Status CoreWorkerPlasmaStoreProvider::Get(
     if (check_signals_) {
       Status status = check_signals_();
       if (!status.ok()) {
-        RAY_RETURN_NOT_OK(raylet_ipc_client_->CancelGetRequest());
         return status;
       }
     }
@@ -337,11 +344,14 @@ Status CoreWorkerPlasmaStoreProvider::Get(
   }
 
   if (!remaining.empty() && timed_out) {
-    RAY_RETURN_NOT_OK(raylet_ipc_client_->CancelGetRequest());
-    return Status::TimedOut("Get timed out: some object(s) not ready.");
+    return Status::TimedOut(absl::StrFormat(
+        "Could not fetch %d objects within the timeout of %dms. %d objects were not "
+        "ready.",
+        object_ids.size(),
+        timeout_ms,
+        remaining.size()));
   }
-
-  return raylet_ipc_client_->CancelGetRequest();
+  return Status::OK();
 }
 
 Status CoreWorkerPlasmaStoreProvider::Contains(const ObjectID &object_id,
 
@@ -218,12 +218,12 @@ class CoreWorkerPlasmaStoreProvider {
   std::shared_ptr<plasma::PlasmaClientInterface> &store_client() { return store_client_; }
 
  private:
-  /// Ask the raylet to pull a set of objects and then attempt to get them
-  /// from the local plasma store. Successfully fetched objects will be removed
-  /// from the input set of remaining IDs and added to the results map.
+  /// Ask the plasma store to return object objects within the timeout.
+  /// Successfully fetched objects will be removed from the input set of remaining IDs and
+  /// added to the results map.
   ///
   /// \param[in/out] remaining IDs of the remaining objects to get.
-  /// \param[in] batch_ids IDs of the objects to get.
+  /// \param[in] ids IDs of the objects to get.
   /// \param[in] timeout_ms Timeout in milliseconds.
   /// \param[out] results Map of objects to write results into. This method will only
   /// add to this map, not clear or remove from it, so the caller can pass in a non-empty
@@ -232,10 +232,10 @@ class CoreWorkerPlasmaStoreProvider {
   /// exception.
   /// \return Status::IOError if there is an error in communicating with the raylet or the
   /// plasma store.
-  /// \return Status::OK otherwise.
-  Status PullObjectsAndGetFromPlasmaStore(
+  /// \return Status::OK if successful.
+  Status GetObjectsFromPlasmaStore(
       absl::flat_hash_set<ObjectID> &remaining,
-      const std::vector<ObjectID> &batch_ids,
+      const std::vector<ObjectID> &ids,
       int64_t timeout_ms,
       absl::flat_hash_map<ObjectID, std::shared_ptr<RayObject>> *results,
       bool *got_exception);
 
@@ -39,15 +39,15 @@ enum MessageType:int {
   // The client should block until it receives this message before closing the socket.
   DisconnectClientReply,
   // Request the Raylet to pull a set of objects to the local node.
-  // a raylet.
   AsyncGetObjectsRequest,
-  // Cancel outstanding get requests from the worker.
+  // Reply contains the request id that will be used to clean up the request.
+  AsyncGetObjectsReply,
+  // Cleanup a given get request on the raylet.
   CancelGetRequest,
   // Notify the current worker is blocked for objects to become available. The raylet
   // will release the worker's resources.
   NotifyWorkerBlocked,
-  // Notify the current worker is unblocked. The raylet will cancel any inflight
-  // pull requests for objects.
+  // Notify the current worker is unblocked.
   NotifyWorkerUnblocked,
   // Wait for objects to be ready either from local or remote Plasma stores.
   WaitRequest,
@@ -136,7 +136,12 @@ table AsyncGetObjectsRequest {
   owner_addresses: [Address];
 }
 
+table AsyncGetObjectsReply {
+  request_id: long;
+}
+
 table CancelGetRequest {
+  request_id: long;
 }
 
 table NotifyWorkerBlocked {