huggingface · gante · Mar 6, 2024 · Feb 20, 2024 · Feb 26, 2024 · Feb 26, 2024
diff --git a/src/transformers/generation/candidate_generator.py b/src/transformers/generation/candidate_generator.py
@@ -252,10 +252,10 @@ class PromptLookupCandidateGenerator(CandidateGenerator):
     def __init__(
         self,
         num_output_tokens: int = 10,
-        max_matching_ngram_size: int = 2,
+        max_matching_ngram_size: int = None,
     ):
         self.num_output_tokens = num_output_tokens
-        self.max_matching_ngram_size = max_matching_ngram_size
+        self.max_matching_ngram_size = max_matching_ngram_size if max_matching_ngram_size else 2
 
         if self.max_matching_ngram_size <= 0 or self.num_output_tokens <= 0:
             raise ValueError("Invalid max_matching_ngram_size or num_output_tokens")

diff --git a/src/transformers/generation/configuration_utils.py b/src/transformers/generation/configuration_utils.py
@@ -257,11 +257,18 @@ class GenerationConfig(PushToHubMixin):
             - `"heuristic_transient"`: Same as `"heuristic"` but `num_assistant_tokens` is reset to its initial value after each generation call.
             - `"constant"`: `num_assistant_tokens` stays unchanged during generation
 
+        prompt_lookup_num_tokens (`int`, *optional*, default to `None`):
+            The number of tokens to be output as candidate tokens.
+
+        max_matching_ngram_size (`int`, *optional*, default to `None`):
+            The maximum ngram size to be considered for matching in the prompt. Default to 2 if not provided.
+
         > Parameters specific to the caching mechanism:
 
         cache_implementation (`str`, *optional*, default to `None`):
             Cache class that should be used when generating.
 
+
         > Wild card
 
         generation_kwargs:
@@ -338,6 +345,7 @@ def __init__(self, **kwargs):
 
         # Prompt lookup decoding
         self.prompt_lookup_num_tokens = kwargs.pop("prompt_lookup_num_tokens", None)
+        self.max_matching_ngram_size = kwargs.pop("max_matching_ngram_size", None)
 
         # Wild card
         self.generation_kwargs = kwargs.pop("generation_kwargs", {})

diff --git a/src/transformers/generation/utils.py b/src/transformers/generation/utils.py
@@ -703,6 +703,7 @@ def _get_candidate_generator(
         if generation_config.prompt_lookup_num_tokens is not None:
             candidate_generator = PromptLookupCandidateGenerator(
                 num_output_tokens=generation_config.prompt_lookup_num_tokens,
+                max_matching_ngram_size=generation_config.max_matching_ngram_size,
             )
         else:
             candidate_generator = AssistedCandidateGenerator(