Skip to content

Commit ed47e1e

Browse files
[1.21 cherry-pick] Fix async callback ordering (#1023) (#1028)
Cherry-pick of #1023 Co-authored-by: Michał Kuligowski <mkuligowski@habana.ai>
1 parent 5a9ddfd commit ed47e1e

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

vllm/worker/hpu_model_runner.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2725,6 +2725,8 @@ def try_revert_dummy_output_tokens():
27252725

27262726
if use_delayed_sampling:
27272727
fake_output = self._delayed_sampler_outputs(model_input)
2728+
elif model_input.async_callback is not None:
2729+
model_input.async_callback()
27282730

27292731
with self.profiler.record_event(
27302732
'internal', ('sample_'
@@ -2746,7 +2748,8 @@ def try_revert_dummy_output_tokens():
27462748
self.cached_step_outputs.append(output)
27472749
self.cached_step_inputs.append(model_input)
27482750
htorch.core.mark_step()
2749-
if model_input.async_callback is not None:
2751+
if use_delayed_sampling \
2752+
and model_input.async_callback is not None:
27502753
model_input.async_callback()
27512754
if i < num_steps - 1:
27522755
if i == 0:

0 commit comments

Comments
 (0)