From 3f2dc476ee75e061628c8e644df803c245777daa Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 10 Mar 2025 13:55:38 +0100 Subject: [PATCH 1/2] fix: Fix bug in voyage implementation "passage" is not a valid input for the voyage API. Remapped to "document". --- mteb/models/voyage_models.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 1c628dda13..76504814e4 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -99,7 +99,12 @@ def encode( **kwargs: Any, ) -> np.ndarray: prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) - input_type = prompt_name if prompt_name is not None else "document" + prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) + prompt_name = ( + "document" if (prompt_name == "passage") else prompt_name + ) # remap to voyage format + input_type = prompt_name if (prompt_name is not None) else "document" + return self._batched_encode(sentences, batch_size, input_type) def _batched_encode( From d34cda9e47cb38a6f76b4ffc771141391e79d642 Mon Sep 17 00:00:00 2001 From: Kenneth Enevoldsen Date: Mon, 10 Mar 2025 15:59:53 +0100 Subject: [PATCH 2/2] Update mteb/models/voyage_models.py Co-authored-by: Roman Solomatin --- mteb/models/voyage_models.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/mteb/models/voyage_models.py b/mteb/models/voyage_models.py index 76504814e4..539f3b5c38 100644 --- a/mteb/models/voyage_models.py +++ b/mteb/models/voyage_models.py @@ -99,11 +99,7 @@ def encode( **kwargs: Any, ) -> np.ndarray: prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) - prompt_name = self.get_prompt_name(self.model_prompts, task_name, prompt_type) - prompt_name = ( - "document" if (prompt_name == "passage") else prompt_name - ) # remap to voyage format - input_type = prompt_name if (prompt_name is not None) else "document" + input_type = self.model_prompts.get(prompt_name, "document") return self._batched_encode(sentences, batch_size, input_type)