diff --git a/deepspeed/inference/engine.py b/deepspeed/inference/engine.py
index 5d54035a39fc..94c63e13329d 100755
--- a/deepspeed/inference/engine.py
+++ b/deepspeed/inference/engine.py
@@ -598,4 +598,12 @@ def _generate(self, *inputs, **kwargs):
             raise NotImplementedError("DeepSpeed does not support `num_beams` > 1, if this is important to you please "
                                       "add your request to: https://github.com/microsoft/DeepSpeed/issues/2506")
 
+        if ("input_ids" in kwargs) and (kwargs["input_ids"].dim() == 2):
+            for input_tensor in kwargs["input_ids"]:
+                tensor_length = input_tensor.shape[-1]
+                if tensor_length > self._config.max_out_tokens:
+                    raise RuntimeError(
+                        f"Input with size {tensor_length} exceeds maximum length of {self._config.max_out_tokens}. Please increase `max_tokens` in the DeepSpeed Inference Config."
+                    )
+
         return self.module.generate(*inputs, **kwargs)