diff --git a/deepspeed/inference/engine.py b/deepspeed/inference/engine.py index 5d54035a39fc..94c63e13329d 100755 --- a/deepspeed/inference/engine.py +++ b/deepspeed/inference/engine.py @@ -598,4 +598,12 @@ def _generate(self, *inputs, **kwargs): raise NotImplementedError("DeepSpeed does not support `num_beams` > 1, if this is important to you please " "add your request to: https://github.com/microsoft/DeepSpeed/issues/2506") + if ("input_ids" in kwargs) and (kwargs["input_ids"].dim() == 2): + for input_tensor in kwargs["input_ids"]: + tensor_length = input_tensor.shape[-1] + if tensor_length > self._config.max_out_tokens: + raise RuntimeError( + f"Input with size {tensor_length} exceeds maximum length of {self._config.max_out_tokens}. Please increase `max_tokens` in the DeepSpeed Inference Config." + ) + return self.module.generate(*inputs, **kwargs)