Skip to content

Commit

Permalink
Fix flaky test linux://python/ray/dag:tests/experimental/test_mocked_…
Browse files Browse the repository at this point in the history
…nccl_dag (ray-project#48433)

Reduce the actors needed for test_mocked_nccl_dag test.

Closes ray-project#48288.

Signed-off-by: Stephanie Wang <smwang@cs.washington.edu>
Signed-off-by: mohitjain2504 <mohit.jain@dream11.com>
  • Loading branch information
stephanie-wang authored and mohitjain2504 committed Nov 15, 2024
1 parent 4705762 commit d5cd4f8
Showing 1 changed file with 12 additions and 32 deletions.
44 changes: 12 additions & 32 deletions python/ray/dag/tests/experimental/test_mocked_nccl_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,8 @@ def test_p2p(ray_start_cluster):
Test simple sender -> receiver pattern. Check that receiver receives
correct results.
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down Expand Up @@ -115,8 +112,6 @@ def test_p2p(ray_start_cluster):
ref = compiled_dag.execute(i, shape=shape, dtype=dtype, send_as_dict=True)
assert ray.get(ref) == (i, shape, dtype)

ray.kill(barrier1)
ray.kill(barrier2)
compiled_dag.teardown()


Expand All @@ -138,11 +133,8 @@ def test_p2p_static_shape(ray_start_cluster, send_as_dict):
_static_shape=True. If sender always sends tensors of
the same shape, then it works.
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down Expand Up @@ -181,11 +173,8 @@ def test_p2p_static_shape_error(capsys, ray_start_cluster, send_as_dict):
Test that when static_shape=True, an error is thrown when a tensor with a
different shape or dtype is found.
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down Expand Up @@ -241,11 +230,8 @@ def test_p2p_direct_return(ray_start_cluster):
"""
Test simple sender -> receiver pattern with _direct_return=True
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down Expand Up @@ -286,11 +272,8 @@ def test_p2p_direct_return_error(capsys, ray_start_cluster):
_direct_return=True. Test that error is thrown when
actor task does not return a tensor directly.
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down Expand Up @@ -355,11 +338,8 @@ def test_p2p_static_shape_and_direct_return(
are passed (check_static_shape=True) OR if non-tensor value is returned
(check_static_shape=False).
"""
# Barrier name should be barrier-{sender rank}-{receiver rank}.
# Create a barrier in both directions because we don't know which rank will
# get assigned to sender and receiver.
barrier1 = Barrier.options(name="barrier-0-1").remote() # noqa
barrier2 = Barrier.options(name="barrier-1-0").remote() # noqa
# Barrier name should be barrier-{lower rank}-{higher rank}.
barrier = Barrier.options(name="barrier-0-1").remote() # noqa

sender = MockedWorker.remote()
receiver = MockedWorker.remote()
Expand Down

0 comments on commit d5cd4f8

Please sign in to comment.