diff --git a/xla/service/gpu/runtime/copy_thunk.cc b/xla/service/gpu/runtime/copy_thunk.cc index ee90d4befc60d..b01c0bf581512 100644 --- a/xla/service/gpu/runtime/copy_thunk.cc +++ b/xla/service/gpu/runtime/copy_thunk.cc @@ -126,12 +126,13 @@ absl::Status DeviceToHostCopyThunk::ExecuteOnStream( TF_ASSIGN_OR_RETURN( se::Stream * stream, GetStreamForExecution(Thunk::execution_stream_id(), params)); + TF_RETURN_IF_ERROR(stream->WaitFor(params.stream)); TF_RETURN_IF_ERROR(stream->Memcpy(cpu_dst, source_data, size_bytes())); if (stream == params.stream) { VLOG(2) << "Memcpy D2H from the main stream"; return absl::OkStatus(); } - VLOG(2) << "Memcpy D2H from the other stream"; + VLOG(2) << "Memcpy D2H from the stream " << Thunk::execution_stream_id(); se::StreamExecutor* executor = params.stream->parent(); se::Event event(executor); if (!event.Init()) {