From b278719e484bd0014cd1bc5042f4acda2c7d03bb Mon Sep 17 00:00:00 2001 From: Michael Wyatt Date: Fri, 15 Sep 2023 16:23:10 -0700 Subject: [PATCH] added check to avoid undefined behavior when the input_id length is greater than max_tokens --- deepspeed/inference/engine.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/deepspeed/inference/engine.py b/deepspeed/inference/engine.py index 5d54035a39fc..94c63e13329d 100755 --- a/deepspeed/inference/engine.py +++ b/deepspeed/inference/engine.py @@ -598,4 +598,12 @@ def _generate(self, *inputs, **kwargs): raise NotImplementedError("DeepSpeed does not support `num_beams` > 1, if this is important to you please " "add your request to: https://github.com/microsoft/DeepSpeed/issues/2506") + if ("input_ids" in kwargs) and (kwargs["input_ids"].dim() == 2): + for input_tensor in kwargs["input_ids"]: + tensor_length = input_tensor.shape[-1] + if tensor_length > self._config.max_out_tokens: + raise RuntimeError( + f"Input with size {tensor_length} exceeds maximum length of {self._config.max_out_tokens}. Please increase `max_tokens` in the DeepSpeed Inference Config." + ) + return self.module.generate(*inputs, **kwargs)