diff --git a/examples/llm_vision.py b/examples/llm_vision.py index eff5c4d52..1bfbfb230 100644 --- a/examples/llm_vision.py +++ b/examples/llm_vision.py @@ -16,7 +16,8 @@ async def main(): invoice_path = Path(__file__).parent.joinpath("..", "tests", "data", "invoices", "invoice-2.png") encode_image(invoice_path) # res = await llm.aask(msg="return `True` if this image might be a invoice, or return `False`", images=[img_base64]) - await llm.aask(msg="hello") + res = await llm.aask(msg="hello") + print(res) # assert ("true" in res.lower()) or ("invoice" in res.lower()) diff --git a/metagpt/provider/ollama_api.py b/metagpt/provider/ollama_api.py index 6a2635b95..4537a8a2c 100644 --- a/metagpt/provider/ollama_api.py +++ b/metagpt/provider/ollama_api.py @@ -49,7 +49,7 @@ def _parse_input_msg(self, msg: dict) -> Tuple[Optional[str], Optional[str]]: if tpe == "text": return msg["text"], None elif tpe == "image_url": - return None, msg["image_url"]["url"][self._image_b64_rms :] + return None, msg["image_url"]["url"][self._image_b64_rms:] else: raise ValueError else: @@ -317,7 +317,7 @@ def get_choice_text(self, rsp): @register_provider(LLMType.OLLAMA_EMBED) -class OllamaEmbed(OllamaLLM): +class OllamaEmbed(OllamaEmbeddings): @property def _llama_api_inuse(self) -> OllamaMessageAPI: return OllamaMessageAPI.EMBED