Skip to content

Commit 69d14eb

Browse files
committed
Don't loop forever on errors writing installinator slots
Currently if there's a permanent error while writing to the M.2 drives we may loop/retry forever. This isn't great behavior so attempt to break if it looks like we aren't making progress writing.
1 parent c51ff72 commit 69d14eb

File tree

1 file changed

+19
-5
lines changed

1 file changed

+19
-5
lines changed

installinator/src/write.rs

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,10 @@ impl<'a> ArtifactWriter<'a> {
281281

282282
// How many drives did we finish writing during the previous iteration?
283283
let mut success_prev_iter = 0;
284+
// Check if we've had the same number of successes as the previous
285+
// iteraiton which can be a sign that something in the write path
286+
// has a permanent error
287+
let mut same_successes = false;
284288

285289
loop {
286290
// How many drives did we finish writing during this iteration?
@@ -350,7 +354,13 @@ impl<'a> ArtifactWriter<'a> {
350354
// 2. At least one drive was successfully written on a previous
351355
// iteration, which implies all other drives got to retry during
352356
// this iteration.
353-
if success_this_iter == self.drives.len() || success_prev_iter > 0 {
357+
// 3. We had the same number of successes as the previous iteration,
358+
// which implies that we seem to be permanetly stuck and unlikely
359+
// to succeed
360+
if success_this_iter == self.drives.len()
361+
|| success_prev_iter > 0
362+
|| same_successes == true
363+
{
354364
break;
355365
}
356366

@@ -364,6 +374,10 @@ impl<'a> ArtifactWriter<'a> {
364374
// Give it a short break, then keep trying.
365375
tokio::time::sleep(Duration::from_secs(5)).await;
366376

377+
if success_this_iter == success_prev_iter {
378+
same_successes = true;
379+
}
380+
367381
success_prev_iter = success_this_iter;
368382
}
369383

@@ -1157,17 +1171,17 @@ mod tests {
11571171
// image, we return two concatenated lists of "fails then one success".
11581172
let success_strategy_host = prop::collection::vec(
11591173
partial_op_strategy(interrupted_would_block_strategy(), 1024),
1160-
0..16,
1174+
0..1,
11611175
);
11621176
let success_strategy_control_plane = prop::collection::vec(
11631177
partial_op_strategy(interrupted_would_block_strategy(), 1024),
1164-
0..16,
1178+
0..1,
11651179
);
11661180

11671181
(
1168-
0..16usize,
1182+
0..1usize,
11691183
success_strategy_host,
1170-
0..16usize,
1184+
0..1usize,
11711185
success_strategy_control_plane,
11721186
)
11731187
.prop_map(

0 commit comments

Comments
 (0)