-
-
Notifications
You must be signed in to change notification settings - Fork 719
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactoring gahter_dep / Remove missing data message #6544
Changes from all commits
526144e
7ca09d7
e4f0ea1
9128b8e
0866450
df4f680
93a4fd1
d020453
43fc518
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -647,3 +647,39 @@ async def test_fetch_to_missing_on_refresh_who_has(c, s, w1, w2, w3): | |
assert w3.tasks["x"].state == "missing" | ||
assert w3.tasks["y"].state == "flight" | ||
assert w3.tasks["y"].who_has == {w2.address} | ||
|
||
|
||
@gen_cluster(client=True, nthreads=[("", 1)]) | ||
async def test_fetch_to_missing_on_network_failure(c, s, a): | ||
""" | ||
1. Two tasks, x and y, are respectively in flight and fetch state from the same | ||
worker, which holds the only replica of both. | ||
2. gather_dep for x returns GatherDepNetworkFailureEvent | ||
3. The event empties has_what, x.who_has, and y.who_has; it recommends a transition | ||
to missing for both x and y. | ||
5. Before the recommendation can be implemented, the same event invokes | ||
_ensure_communicating, which pops y from data_needed - but y has an empty | ||
who_has, which is an exceptional situation. | ||
6. The fetch->missing transition is executed, but y is no longer in data_needed - | ||
another exceptional situation. | ||
""" | ||
block_get_data = asyncio.Event() | ||
|
||
class BlockedBreakingWorker(Worker): | ||
async def get_data(self, comm, *args, **kwargs): | ||
await block_get_data.wait() | ||
raise OSError("fake error") | ||
|
||
async with BlockedBreakingWorker(s.address) as b: | ||
x = c.submit(inc, 1, key="x", workers=[b.address]) | ||
y = c.submit(inc, 2, key="y", workers=[b.address]) | ||
await wait([x, y]) | ||
s.request_acquire_replicas(a.address, ["x"], stimulus_id="test_x") | ||
await wait_for_state("x", "flight", a) | ||
s.request_acquire_replicas(a.address, ["y"], stimulus_id="test_y") | ||
await wait_for_state("y", "fetch", a) | ||
|
||
block_get_data.set() | ||
|
||
await wait_for_state("x", "missing", a) | ||
# await wait_for_state("y", "missing", a) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. y is not reliably transitioned to missing since it is stuck in fetch, depending on how fast ther RefreshWhoHas comes in. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This test is a bit nonsense. I don't think we should be allowed to simplify mess with the state and expect the system to recover