From 924650f8cac72a33555a95d32c8863f6a768445e Mon Sep 17 00:00:00 2001 From: Joe Cummings Date: Tue, 24 Sep 2024 23:41:45 -0400 Subject: [PATCH 1/2] Use `image_seq_len` to determine proper cache size in generate_v2 --- recipes/dev/generate_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/dev/generate_v2.py b/recipes/dev/generate_v2.py index feae291fb5..ec5d6a252e 100644 --- a/recipes/dev/generate_v2.py +++ b/recipes/dev/generate_v2.py @@ -133,7 +133,7 @@ def generate(self, cfg: DictConfig): batch_size=1, dtype=self._dtype, encoder_max_seq_len=( - model_inputs["encoder_mask"][0].size(1) + self.model_transform.image_seq_len if is_multimodal_input else None ), From 738baf84e6e2546a73f237036228f6253a8bfb1b Mon Sep 17 00:00:00 2001 From: Joe Cummings Date: Wed, 25 Sep 2024 00:10:12 -0400 Subject: [PATCH 2/2] Use `image_seq_len` to determine proper cache size in generate_v2 (#1673) --- recipes/dev/generate_v2.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/recipes/dev/generate_v2.py b/recipes/dev/generate_v2.py index ec5d6a252e..e63ea2dcb0 100644 --- a/recipes/dev/generate_v2.py +++ b/recipes/dev/generate_v2.py @@ -133,9 +133,7 @@ def generate(self, cfg: DictConfig): batch_size=1, dtype=self._dtype, encoder_max_seq_len=( - self.model_transform.image_seq_len - if is_multimodal_input - else None + self.model_transform.image_seq_len if is_multimodal_input else None ), decoder_max_seq_len=total_response_length, )