diff --git a/src/transformers/generation_beam_search.py b/src/transformers/generation_beam_search.py
index 1fea43e1d7e503..cebe754af23ee9 100644
--- a/src/transformers/generation_beam_search.py
+++ b/src/transformers/generation_beam_search.py
@@ -13,6 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import warnings
 from abc import ABC, abstractmethod
 from collections import UserDict
 from typing import Optional, Tuple
@@ -110,6 +111,7 @@ def finalize(
         next_scores: torch.FloatTensor,
         next_tokens: torch.LongTensor,
         next_indices: torch.LongTensor,
+        max_length: int,
         **kwargs
     ) -> torch.LongTensor:
         raise NotImplementedError("This is an abstract method.")
@@ -152,15 +154,14 @@ class BeamSearchScorer(BeamScorer):
     def __init__(
         self,
         batch_size: int,
-        max_length: int,
         num_beams: int,
         device: torch.device,
         length_penalty: Optional[float] = 1.0,
         do_early_stopping: Optional[bool] = False,
         num_beam_hyps_to_keep: Optional[int] = 1,
         num_beam_groups: Optional[int] = 1,
+        **kwargs,
     ):
-        self.max_length = max_length
         self.num_beams = num_beams
         self.device = device
         self.length_penalty = length_penalty
@@ -173,7 +174,6 @@ def __init__(
         self._beam_hyps = [
             BeamHypotheses(
                 num_beams=self.num_beams,
-                max_length=self.max_length,
                 length_penalty=self.length_penalty,
                 early_stopping=self.do_early_stopping,
             )
@@ -192,6 +192,13 @@ def __init__(
                 f"has to be divisible by `num_beam_groups`, but is {num_beam_groups} with `num_beams` being {num_beams}."
             )
 
+        if "max_length" in kwargs:
+            warnings.warn(
+                "Passing `max_length` to BeamSearchScorer is deprecated and has no effect."
+                "`max_length` should be passed directly to `beam_search(...)`, `beam_sample(...)`"
+                ",or `group_beam_search(...)`."
+            )
+
     @property
     def is_done(self) -> bool:
         return self._done.all()
@@ -279,6 +286,7 @@ def finalize(
         final_beam_scores: torch.FloatTensor,
         final_beam_tokens: torch.LongTensor,
         final_beam_indices: torch.LongTensor,
+        max_length: int,
         pad_token_id: Optional[int] = None,
         eos_token_id: Optional[int] = None,
     ) -> Tuple[torch.LongTensor]:
@@ -316,7 +324,7 @@ def finalize(
                 best_scores[i * self.num_beam_hyps_to_keep + j] = best_score
 
         # prepare for adding eos
-        sent_max_len = min(sent_lengths.max().item() + 1, self.max_length)
+        sent_max_len = min(sent_lengths.max().item() + 1, max_length)
         decoded: torch.LongTensor = input_ids.new(batch_size * self.num_beam_hyps_to_keep, sent_max_len)
         # shorter batches are padded if needed
         if sent_lengths.min().item() != sent_lengths.max().item():
@@ -326,7 +334,7 @@ def finalize(
         # fill with hypotheses and eos_token_id if the latter fits in
         for i, hypo in enumerate(best):
             decoded[i, : sent_lengths[i]] = hypo
-            if sent_lengths[i] < self.max_length:
+            if sent_lengths[i] < max_length:
                 decoded[i, sent_lengths[i]] = eos_token_id
         return UserDict(
             {
@@ -337,11 +345,10 @@ def finalize(
 
 
 class BeamHypotheses:
-    def __init__(self, num_beams: int, max_length: int, length_penalty: float, early_stopping: bool):
+    def __init__(self, num_beams: int, length_penalty: float, early_stopping: bool):
         """
         Initialize n-best list of hypotheses.
         """
-        self.max_length = max_length - 1  # ignoring bos_token
         self.length_penalty = length_penalty
         self.early_stopping = early_stopping
         self.num_beams = num_beams
diff --git a/src/transformers/generation_utils.py b/src/transformers/generation_utils.py
index 959c63c728473c..097e552c5ba601 100644
--- a/src/transformers/generation_utils.py
+++ b/src/transformers/generation_utils.py
@@ -1027,7 +1027,6 @@ def generate(
 
             beam_scorer = BeamSearchScorer(
                 batch_size=batch_size,
-                max_length=stopping_criteria.max_length,
                 num_beams=num_beams,
                 device=self.device,
                 length_penalty=length_penalty,
@@ -1063,7 +1062,6 @@ def generate(
                 raise ValueError("`max_length` needs to be a stopping_criteria for now.")
             beam_scorer = BeamSearchScorer(
                 batch_size=batch_size,
-                max_length=stopping_criteria.max_length,
                 num_beams=num_beams,
                 device=self.device,
                 length_penalty=length_penalty,
@@ -1700,7 +1698,6 @@ def beam_search(
             >>> # instantiate beam scorer
             >>> beam_scorer = BeamSearchScorer(
             ...     batch_size=1,
-            ...     max_length=model.config.max_length,
             ...     num_beams=num_beams,
             ...     device=model.device,
             ... )
@@ -1756,7 +1753,7 @@ def beam_search(
 
         assert (
             num_beams * batch_size == batch_beam_size
-        ), "Batch dimension of `input_ids` should be {num_beams * batch_size}, but is {batch_beam_size}."
+        ), f"Batch dimension of `input_ids` should be {num_beams * batch_size}, but is {batch_beam_size}."
 
         beam_scores = torch.zeros((batch_size, num_beams), dtype=torch.float, device=input_ids.device)
         beam_scores[:, 1:] = -1e9
@@ -1792,10 +1789,7 @@ def beam_search(
 
             # hack: adjust tokens for Marian. For Marian we have to make sure that the `pad_token_id`
             # cannot be generated both before and after the `F.log_softmax` operation.
-            next_token_logits = self.adjust_logits_during_generation(
-                next_token_logits, cur_len=cur_len, max_length=None
-            )
-
+            next_token_logits = self.adjust_logits_during_generation(next_token_logits, cur_len=cur_len)
             next_token_scores = F.log_softmax(next_token_logits, dim=-1)  # (batch_size * num_beams, vocab_size)
 
             next_token_scores = logits_processor(input_ids, next_token_scores)
@@ -1861,7 +1855,13 @@ def beam_search(
                     this_peer_finished = True
 
         sequence_outputs = beam_scorer.finalize(
-            input_ids, beam_scores, next_tokens, next_indices, pad_token_id=pad_token_id, eos_token_id=eos_token_id
+            input_ids,
+            beam_scores,
+            next_tokens,
+            next_indices,
+            pad_token_id=pad_token_id,
+            eos_token_id=eos_token_id,
+            max_length=stopping_criteria.max_length,
         )
 
         if return_dict_in_generate:
@@ -2086,10 +2086,7 @@ def beam_sample(
 
             # hack: adjust tokens for Marian. For Marian we have to make sure that the `pad_token_id`
             # cannot be generated both before and after the `F.log_softmax` operation.
-            next_token_logits = self.adjust_logits_during_generation(
-                next_token_logits, cur_len=cur_len, max_length=None
-            )
-
+            next_token_logits = self.adjust_logits_during_generation(next_token_logits, cur_len=cur_len)
             next_token_scores = F.log_softmax(next_token_logits, dim=-1)  # (batch_size * num_beams, vocab_size)
 
             next_token_scores = logits_processor(input_ids, next_token_scores)
@@ -2160,7 +2157,13 @@ def beam_sample(
                     this_peer_finished = True
 
         sequence_outputs = beam_scorer.finalize(
-            input_ids, beam_scores, next_tokens, next_indices, pad_token_id=pad_token_id, eos_token_id=eos_token_id
+            input_ids,
+            beam_scores,
+            next_tokens,
+            next_indices,
+            pad_token_id=pad_token_id,
+            eos_token_id=eos_token_id,
+            max_length=stopping_criteria.max_length,
         )
 
         if return_dict_in_generate:
@@ -2411,10 +2414,7 @@ def group_beam_search(
 
                 # hack: adjust tokens for Marian. For Marian we have to make sure that the `pad_token_id`
                 # cannot be generated both before and after the `F.log_softmax` operation.
-                next_token_logits = self.adjust_logits_during_generation(
-                    next_token_logits, cur_len=cur_len, max_length=None
-                )
-
+                next_token_logits = self.adjust_logits_during_generation(next_token_logits, cur_len=cur_len)
                 next_token_scores = F.log_softmax(next_token_logits, dim=-1)  # (batch_size * group_size, vocab_size)
                 vocab_size = next_token_scores.shape[-1]
 
@@ -2497,7 +2497,13 @@ def group_beam_search(
                     this_peer_finished = True
 
         sequence_outputs = beam_scorer.finalize(
-            input_ids, beam_scores, next_tokens, next_indices, pad_token_id=pad_token_id, eos_token_id=eos_token_id
+            input_ids,
+            beam_scores,
+            next_tokens,
+            next_indices,
+            pad_token_id=pad_token_id,
+            eos_token_id=eos_token_id,
+            max_length=stopping_criteria.max_length,
         )
 
         if return_dict_in_generate:
diff --git a/src/transformers/models/marian/modeling_marian.py b/src/transformers/models/marian/modeling_marian.py
index b393697e621851..1431a0e11c2a7f 100755
--- a/src/transformers/models/marian/modeling_marian.py
+++ b/src/transformers/models/marian/modeling_marian.py
@@ -1335,7 +1335,7 @@ def prepare_inputs_for_generation(
     def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor):
         return shift_tokens_right(labels, self.config.pad_token_id, self.config.decoder_start_token_id)
 
-    def adjust_logits_during_generation(self, logits, cur_len, max_length):
+    def adjust_logits_during_generation(self, logits, cur_len):
         logits[:, self.config.pad_token_id] = float("-inf")  # never predict pad token.
         return logits
 
diff --git a/src/transformers/models/rag/modeling_rag.py b/src/transformers/models/rag/modeling_rag.py
index 7975361749f7d7..42c2e16d6ca795 100644
--- a/src/transformers/models/rag/modeling_rag.py
+++ b/src/transformers/models/rag/modeling_rag.py
@@ -1543,7 +1543,6 @@ def extend_enc_output(tensor, num_beams=None):
                 raise ValueError("`num_return_sequences` has to be smaller or equal to `num_beams`.")
             beam_scorer = BeamSearchScorer(
                 batch_size=batch_size,
-                max_length=max_length,
                 num_beams=num_beams,
                 device=self.device,
                 length_penalty=length_penalty,
diff --git a/tests/test_generation_beam_search.py b/tests/test_generation_beam_search.py
index aa8270c31f2c0a..fdbe35eafaa449 100644
--- a/tests/test_generation_beam_search.py
+++ b/tests/test_generation_beam_search.py
@@ -59,7 +59,6 @@ def __init__(
     def prepare_beam_scorer(self, **kwargs):
         return BeamSearchScorer(
             batch_size=kwargs.get("batch_size", self.batch_size),
-            max_length=kwargs.get("max_length", self.max_length),
             num_beams=kwargs.get("num_beams", self.num_beams),
             device=torch_device,
             length_penalty=kwargs.get("length_penalty", self.length_penalty),
@@ -170,9 +169,7 @@ def cut_expected_tensor(tensor):
     def check_beam_scores_finalize(self, input_ids, next_tokens, next_indices, next_scores):
         # max_length should be only one more than current input_ids to check that eos is correctly appended
         max_length = self.sequence_length + 1
-        beam_scorer = self.prepare_beam_scorer(
-            num_beam_hyps_to_keep=1, max_length=max_length, length_penalty=1.0, do_early_stopping=False
-        )
+        beam_scorer = self.prepare_beam_scorer(num_beam_hyps_to_keep=1, length_penalty=1.0, do_early_stopping=False)
 
         # update beams and append to input_ids
         tokens = next_tokens.clone()
@@ -197,6 +194,7 @@ def check_beam_scores_finalize(self, input_ids, next_tokens, next_indices, next_
             output_indices,
             pad_token_id=self.pad_token_id,
             eos_token_id=self.eos_token_id,
+            max_length=max_length,
         )
 
         sequences = sequence_output["sequences"]
@@ -225,6 +223,7 @@ def check_beam_scores_finalize(self, input_ids, next_tokens, next_indices, next_
             output_indices,
             pad_token_id=self.pad_token_id,
             eos_token_id=self.eos_token_id,
+            max_length=max_length,
         )
         sequences = sequence_output["sequences"]
         sequence_scores = sequence_output["sequence_scores"]
diff --git a/tests/test_generation_utils.py b/tests/test_generation_utils.py
index 42c44b8c54e83d..4a7140d2ca3e50 100644
--- a/tests/test_generation_utils.py
+++ b/tests/test_generation_utils.py
@@ -148,7 +148,6 @@ def _get_beam_scorer_and_kwargs(batch_size, max_length, num_return_sequences=1):
         }
         beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=beam_kwargs["num_beams"],
             device=torch_device,
             length_penalty=beam_kwargs["length_penalty"],
@@ -169,7 +168,6 @@ def _get_diverse_beam_scorer_and_kwargs(batch_size, max_length, num_return_seque
         }
         beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=beam_kwargs["num_beams"],
             device=torch_device,
             length_penalty=beam_kwargs["length_penalty"],
@@ -1411,7 +1409,6 @@ def test_max_length_backward_compat_beam_search(self):
 
         beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=num_beams,
             device=torch_device,
         )
@@ -1442,7 +1439,6 @@ def test_max_length_backward_compat_group_beam_search(self):
 
         diverse_beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=num_beams,
             device=torch_device,
             num_beam_hyps_to_keep=num_return_sequences,
@@ -1502,7 +1498,6 @@ def test_max_length_warning_if_different(self):
         # Beam
         beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=num_beams,
             device=torch_device,
         )
@@ -1520,7 +1515,6 @@ def test_max_length_warning_if_different(self):
         # Grouped beam search
         diverse_beam_scorer = BeamSearchScorer(
             batch_size=batch_size,
-            max_length=max_length,
             num_beams=num_beams,
             device=torch_device,
             num_beam_hyps_to_keep=num_return_sequences,
@@ -1535,3 +1529,51 @@ def test_max_length_warning_if_different(self):
                 max_length=max_length,
                 **model_kwargs,
             )
+
+    def test_beam_search_warning_if_max_length_is_passed(self):
+        article = """Justin Timberlake and Jessica Biel, welcome to parenthood."""
+        bart_tokenizer = BartTokenizer.from_pretrained("sshleifer/bart-tiny-random")
+        bart_model = BartForConditionalGeneration.from_pretrained("sshleifer/bart-tiny-random").to(torch_device)
+
+        batch_size = 1
+        num_beams = 3
+
+        input_ids = bart_tokenizer(article, return_tensors="pt").input_ids.to(torch_device)
+        input_ids = input_ids.expand(num_beams, -1)
+        model_kwargs = bart_model._prepare_encoder_decoder_kwargs_for_generation(input_ids, {})
+
+        stopping_criteria_max_length = 18
+        stopping_criteria = StoppingCriteriaList([MaxLengthCriteria(max_length=stopping_criteria_max_length)])
+
+        with self.assertWarns(UserWarning):
+            beam_scorer = BeamSearchScorer(
+                batch_size=batch_size,
+                num_beams=num_beams,
+                device=torch_device,
+                max_length=10,
+            )
+
+        generated_ids = bart_model.beam_search(
+            input_ids,
+            num_beams=num_beams,
+            stopping_criteria=stopping_criteria,
+            beam_scorer=beam_scorer,
+            **model_kwargs,
+        )
+
+        beam_scorer_no_max_len = BeamSearchScorer(
+            batch_size=batch_size,
+            num_beams=num_beams,
+            device=torch_device,
+        )
+
+        generated_ids_no_max_len = bart_model.beam_search(
+            input_ids,
+            num_beams=num_beams,
+            stopping_criteria=stopping_criteria,
+            beam_scorer=beam_scorer_no_max_len,
+            **model_kwargs,
+        )
+
+        # BeamSearchScorer max_length should not influence "real" max_length
+        self.assertEqual(generated_ids.tolist(), generated_ids_no_max_len.tolist())