diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
index 17b8875a40195d..25abcc67e90e38 100644
--- a/src/transformers/generation/configuration_utils.py
+++ b/src/transformers/generation/configuration_utils.py
@@ -373,6 +373,8 @@ def validate(self, is_init=False):
         # Validation of individual attributes
         if self.early_stopping not in {True, False, "never"}:
             raise ValueError(f"`early_stopping` must be a boolean or 'never', but is {self.early_stopping}.")
+        if self.max_new_tokens is not None and self.max_new_tokens <= 0:
+            raise ValueError(f"`max_new_tokens` must be greater than 0, but is {self.max_new_tokens}.")
 
         # Validation of attribute relations:
         fix_location = ""
diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
index 622d6731776876..0b8102c353da87 100644
--- a/src/transformers/generation/utils.py
+++ b/src/transformers/generation/utils.py
@@ -1138,11 +1138,10 @@ def _validate_generated_length(self, generation_config, input_ids_length, has_de
             )
         if input_ids_length >= generation_config.max_length:
             input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
-            warnings.warn(
+            raise ValueError(
                 f"Input length of {input_ids_string} is {input_ids_length}, but `max_length` is set to"
                 f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
-                " increasing `max_new_tokens`.",
-                UserWarning,
+                " increasing `max_length` or, better yet, setting `max_new_tokens`."
             )
 
         # 2. Min length warnings due to unfeasible parameter combinations
diff --git a/tests/pipelines/test_pipelines_text_generation.py b/tests/pipelines/test_pipelines_text_generation.py
index bf4c1e9f9d4de1..0500e3b0353c4a 100644
--- a/tests/pipelines/test_pipelines_text_generation.py
+++ b/tests/pipelines/test_pipelines_text_generation.py
@@ -93,17 +93,19 @@ def test_small_model_pt(self):
 
         ## -- test tokenizer_kwargs
         test_str = "testing tokenizer kwargs. using truncation must result in a different generation."
+        input_len = len(text_generator.tokenizer(test_str)["input_ids"])
         output_str, output_str_with_truncation = (
-            text_generator(test_str, do_sample=False, return_full_text=False)[0]["generated_text"],
+            text_generator(test_str, do_sample=False, return_full_text=False, min_new_tokens=1)[0]["generated_text"],
             text_generator(
                 test_str,
                 do_sample=False,
                 return_full_text=False,
+                min_new_tokens=1,
                 truncation=True,
-                max_length=3,
+                max_length=input_len + 1,
             )[0]["generated_text"],
         )
-        assert output_str != output_str_with_truncation  # results must be different because one hd truncation
+        assert output_str != output_str_with_truncation  # results must be different because one had truncation
 
         # -- what is the point of this test? padding is hardcoded False in the pipeline anyway
         text_generator.tokenizer.pad_token_id = text_generator.model.config.eos_token_id