diff --git a/deepspeed/runtime/pipe/engine.py b/deepspeed/runtime/pipe/engine.py index a080559b1a2a..d4b2d0529dbf 100644 --- a/deepspeed/runtime/pipe/engine.py +++ b/deepspeed/runtime/pipe/engine.py @@ -1009,7 +1009,7 @@ def _exec_send_grads(self, buffer_id): # a grad that needs to be communicated. We free the buffer immediately # after, so no need to restore it. The receiver also has a hack that skips # the recv. This is because NCCL does not let us send torch.BoolTensor :-(. - if self.has_attention_mask or self.has_bool_tensors: + if len(inputs) > 2 and (self.has_attention_mask or self.has_bool_tensors): inputs = list(inputs) inputs.pop() inputs = tuple(inputs)