diff --git a/deepspeed/runtime/pipe/engine.py b/deepspeed/runtime/pipe/engine.py
index a080559b1a2a..d4b2d0529dbf 100644
--- a/deepspeed/runtime/pipe/engine.py
+++ b/deepspeed/runtime/pipe/engine.py
@@ -1009,7 +1009,7 @@ def _exec_send_grads(self, buffer_id):
         # a grad that needs to be communicated. We free the buffer immediately
         # after, so no need to restore it. The receiver also has a hack that skips
         # the recv. This is because NCCL does not let us send torch.BoolTensor :-(.
-        if self.has_attention_mask or self.has_bool_tensors:
+        if len(inputs) > 2 and (self.has_attention_mask or self.has_bool_tensors):
             inputs = list(inputs)
             inputs.pop()
             inputs = tuple(inputs)