From fdb4c284dedf375f1817be26922540aefc3fc84b Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Wed, 27 Nov 2024 18:44:05 +0700
Subject: [PATCH 01/17] add support for multimodal

---
 .../python/agent/tools/query_engine.py        | 22 ++++++++++++++-----
 .../components/settings/python/settings.py    | 17 +++++++++++++-
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index e78ae0442..ad0a47224 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,10 +1,13 @@
 import os
 from typing import Optional
 
+from llama_index.core.query_engine import BaseQueryEngine, SimpleMultiModalQueryEngine
 from llama_index.core.tools.query_engine import QueryEngineTool
 
+from app.settings import get_multi_modal_llm
 
-def create_query_engine(index, **kwargs):
+
+def create_query_engine(index, **kwargs) -> BaseQueryEngine:
     """
     Create a query engine for the given index.
 
@@ -17,11 +20,18 @@ def create_query_engine(index, **kwargs):
         kwargs["similarity_top_k"] = top_k
     # If index is index is LlamaCloudIndex
     # use auto_routed mode for better query results
-    if (
-        index.__class__.__name__ == "LlamaCloudIndex"
-        and kwargs.get("auto_routed") is None
-    ):
-        kwargs["auto_routed"] = True
+    if index.__class__.__name__ == "LlamaCloudIndex":
+        if kwargs.get("auto_routed") is None:
+            kwargs["auto_routed"] = True
+            kwargs["retrieve_image_nodes"] = True
+            # TODO: Add support for MultiModalVectorStoreIndex
+            mm_llm = get_multi_modal_llm()
+            if mm_llm:
+                return SimpleMultiModalQueryEngine(
+                    retriever=index.as_retriever(**kwargs),
+                    multi_modal_llm=mm_llm,
+                )
+
     return index.as_query_engine(**kwargs)
 
 
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index bc7270bd8..0068116f0 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -1,9 +1,24 @@
 import os
-from typing import Dict
+from typing import Dict, Optional
 
+from llama_index.core.multi_modal_llms import MultiModalLLM
 from llama_index.core.settings import Settings
 
 
+def get_multi_modal_llm() -> Optional[MultiModalLLM]:
+    model_provider = os.getenv("MODEL_PROVIDER")
+    llm_model = os.getenv("MULTIMODAL_LLM_MODEL")
+    if llm_model is None:
+        return None
+    if model_provider == "openai":
+        from llama_index.multi_modal_llms.openai import OpenAIMultiModal
+
+        return OpenAIMultiModal(model=llm_model)
+    else:
+        # TODO: Add support for other providers
+        return None
+
+
 def init_settings():
     model_provider = os.getenv("MODEL_PROVIDER")
     match model_provider:

From 1e647c22186803faff1101903dfd675f6c7c254c Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 08:52:43 +0700
Subject: [PATCH 02/17] update code

---
 .../python/agent/tools/query_engine.py        | 129 ++++++++++++++++--
 .../components/settings/python/settings.py    |  24 ++--
 2 files changed, 128 insertions(+), 25 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index ad0a47224..5fe0c5a06 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,10 +1,117 @@
 import os
-from typing import Optional
+from typing import List, Optional, Sequence
 
-from llama_index.core.query_engine import BaseQueryEngine, SimpleMultiModalQueryEngine
+from llama_index.core.base.base_query_engine import BaseQueryEngine
+from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
+from llama_index.core.query_engine import SimpleMultiModalQueryEngine
+from llama_index.core.query_engine.multi_modal import _get_image_and_text_nodes
+from llama_index.core.response_synthesizers import (
+    BaseSynthesizer,
+    get_response_synthesizer,
+)
+from llama_index.core.response_synthesizers.type import ResponseMode
+from llama_index.core.schema import ImageNode, NodeWithScore, QueryBundle
 from llama_index.core.tools.query_engine import QueryEngineTool
 
-from app.settings import get_multi_modal_llm
+from app.settings import multi_modal_llm
+
+
+class MultiModalQueryEngine(SimpleMultiModalQueryEngine):
+    """
+    A multi-modal query engine that splits the retrieval results into chunks then summarizes each chunk to reduce the number of tokens in the response.
+    """
+
+    def __init__(
+        self,
+        text_synthesizer: Optional[BaseSynthesizer] = None,
+        *args,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        # Use a response synthesizer for text nodes summarization
+        self._text_synthesizer = text_synthesizer or get_response_synthesizer(
+            streaming=False,
+            response_mode=ResponseMode.TREE_SUMMARIZE,
+        )
+
+    def _summarize_text_nodes(
+        self, query_bundle: QueryBundle, nodes: List[NodeWithScore]
+    ) -> str:
+        """
+        Synthesize a response for the query using the retrieved nodes.
+        """
+        return str(
+            self._text_synthesizer.synthesize(
+                query=query_bundle,
+                nodes=nodes,
+                streaming=False,
+            )
+        )
+
+    def synthesize(
+        self,
+        query_bundle: QueryBundle,
+        nodes: List[NodeWithScore],
+    ) -> RESPONSE_TYPE:
+        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
+        # Summarize the text nodes
+        text_response = self._summarize_text_nodes(
+            query_bundle=query_bundle,
+            nodes=text_nodes,
+        )
+
+        fmt_prompt = self._text_qa_template.format(
+            context_str=text_response,
+            query_str=query_bundle.query_str,
+        )
+
+        llm_response = self._multi_modal_llm.complete(
+            prompt=fmt_prompt,
+            image_documents=[
+                image_node.node
+                for image_node in image_nodes
+                if isinstance(image_node.node, ImageNode)
+            ],
+        )
+
+        return Response(
+            response=str(llm_response),
+            source_nodes=nodes,
+            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
+        )
+
+    async def asynthesize(
+        self,
+        query_bundle: QueryBundle,
+        nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+    ) -> RESPONSE_TYPE:
+        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
+        # Summarize the text nodes to avoid exceeding the token limit
+        text_response = self._summarize_text_nodes(
+            query_bundle=query_bundle,
+            nodes=text_nodes,
+        )
+
+        fmt_prompt = self._text_qa_template.format(
+            context_str=text_response,
+            query_str=query_bundle.query_str,
+        )
+
+        llm_response = await self._multi_modal_llm.acomplete(
+            prompt=fmt_prompt,
+            image_documents=[
+                image_node.node
+                for image_node in image_nodes
+                if isinstance(image_node.node, ImageNode)
+            ],
+        )
+
+        return Response(
+            response=str(llm_response),
+            source_nodes=nodes,
+            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
+        )
 
 
 def create_query_engine(index, **kwargs) -> BaseQueryEngine:
@@ -21,15 +128,15 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
     # If index is index is LlamaCloudIndex
     # use auto_routed mode for better query results
     if index.__class__.__name__ == "LlamaCloudIndex":
-        if kwargs.get("auto_routed") is None:
-            kwargs["auto_routed"] = True
-            kwargs["retrieve_image_nodes"] = True
-            # TODO: Add support for MultiModalVectorStoreIndex
-            mm_llm = get_multi_modal_llm()
-            if mm_llm:
-                return SimpleMultiModalQueryEngine(
+        retrieval_mode = kwargs.get("retrieval_mode")
+        if retrieval_mode is None:
+            kwargs["retrieval_mode"] = "auto_routed"
+            if multi_modal_llm:
+                # Note: image nodes are not supported for auto_routed or chunk retrieval mode
+                kwargs["retrieve_image_nodes"] = True
+                return MultiModalQueryEngine(
                     retriever=index.as_retriever(**kwargs),
-                    multi_modal_llm=mm_llm,
+                    multi_modal_llm=multi_modal_llm,
                 )
 
     return index.as_query_engine(**kwargs)
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index 0068116f0..ae24ee4ad 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -4,19 +4,8 @@
 from llama_index.core.multi_modal_llms import MultiModalLLM
 from llama_index.core.settings import Settings
 
-
-def get_multi_modal_llm() -> Optional[MultiModalLLM]:
-    model_provider = os.getenv("MODEL_PROVIDER")
-    llm_model = os.getenv("MULTIMODAL_LLM_MODEL")
-    if llm_model is None:
-        return None
-    if model_provider == "openai":
-        from llama_index.multi_modal_llms.openai import OpenAIMultiModal
-
-        return OpenAIMultiModal(model=llm_model)
-    else:
-        # TODO: Add support for other providers
-        return None
+# Singleton for multi-modal LLM
+multi_modal_llm: Optional[MultiModalLLM] = None
 
 
 def init_settings():
@@ -75,14 +64,21 @@ def init_openai():
     from llama_index.core.constants import DEFAULT_TEMPERATURE
     from llama_index.embeddings.openai import OpenAIEmbedding
     from llama_index.llms.openai import OpenAI
+    from llama_index.multi_modal_llms.openai import OpenAIMultiModal
+    from llama_index.multi_modal_llms.openai.utils import GPT4V_MODELS
 
     max_tokens = os.getenv("LLM_MAX_TOKENS")
+    model_name = os.getenv("MODEL", "gpt-4o-mini")
     Settings.llm = OpenAI(
-        model=os.getenv("MODEL", "gpt-4o-mini"),
+        model=model_name,
         temperature=float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
         max_tokens=int(max_tokens) if max_tokens is not None else None,
     )
 
+    if model_name in GPT4V_MODELS:
+        global multi_modal_llm
+        multi_modal_llm = OpenAIMultiModal(model=model_name)
+
     dimensions = os.getenv("EMBEDDING_DIM")
     Settings.embed_model = OpenAIEmbedding(
         model=os.getenv("EMBEDDING_MODEL", "text-embedding-3-small"),

From bc8df660a11c5d153e50d04141615232d720dfb9 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 09:15:11 +0700
Subject: [PATCH 03/17] fix wrong signature override

---
 templates/components/engines/python/agent/tools/query_engine.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 5fe0c5a06..123eb679f 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -52,6 +52,7 @@ def synthesize(
         self,
         query_bundle: QueryBundle,
         nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
     ) -> RESPONSE_TYPE:
         image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
         # Summarize the text nodes

From 9d6f94b97226585a04412444453e5cedc42fc529 Mon Sep 17 00:00:00 2001
From: thucpn <thucsh2@gmail.com>
Date: Fri, 29 Nov 2024 11:05:46 +0700
Subject: [PATCH 04/17] bump: chat-ui

---
 .../types/streaming/nextjs/app/components/chat-section.tsx     | 3 +--
 templates/types/streaming/nextjs/package.json                  | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/templates/types/streaming/nextjs/app/components/chat-section.tsx b/templates/types/streaming/nextjs/app/components/chat-section.tsx
index 4a0fe9deb..edf7f3311 100644
--- a/templates/types/streaming/nextjs/app/components/chat-section.tsx
+++ b/templates/types/streaming/nextjs/app/components/chat-section.tsx
@@ -1,8 +1,7 @@
 "use client";
 
 import { ChatSection as ChatSectionUI } from "@llamaindex/chat-ui";
-import "@llamaindex/chat-ui/styles/code.css";
-import "@llamaindex/chat-ui/styles/katex.css";
+import "@llamaindex/chat-ui/styles/markdown.css";
 import "@llamaindex/chat-ui/styles/pdf.css";
 import { useChat } from "ai/react";
 import CustomChatInput from "./ui/chat/chat-input";
diff --git a/templates/types/streaming/nextjs/package.json b/templates/types/streaming/nextjs/package.json
index ff185c709..94e576f07 100644
--- a/templates/types/streaming/nextjs/package.json
+++ b/templates/types/streaming/nextjs/package.json
@@ -16,7 +16,7 @@
     "@radix-ui/react-select": "^2.1.1",
     "@radix-ui/react-slot": "^1.0.2",
     "@radix-ui/react-tabs": "^1.1.0",
-    "@llamaindex/chat-ui": "0.0.11",
+    "@llamaindex/chat-ui": "0.0.12",
     "ai": "4.0.3",
     "ajv": "^8.12.0",
     "class-variance-authority": "^0.7.0",

From a40da22788337cadb14487a0c531892c608d9347 Mon Sep 17 00:00:00 2001
From: Thuc Pham <51660321+thucpn@users.noreply.github.com>
Date: Fri, 29 Nov 2024 11:06:16 +0700
Subject: [PATCH 05/17] Create lazy-insects-share.md

---
 .changeset/lazy-insects-share.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/lazy-insects-share.md

diff --git a/.changeset/lazy-insects-share.md b/.changeset/lazy-insects-share.md
new file mode 100644
index 000000000..4f914968e
--- /dev/null
+++ b/.changeset/lazy-insects-share.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+bump: chat-ui

From 88d291801fe84284df80dbab481990ffc5e01d70 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 11:52:28 +0700
Subject: [PATCH 06/17] add MultiModalSynthesizer

---
 .../python/agent/tools/query_engine.py        | 104 ++++++------------
 .../components/settings/python/settings.py    |   9 +-
 2 files changed, 37 insertions(+), 76 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 123eb679f..905d20df9 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,102 +1,59 @@
 import os
-from typing import List, Optional, Sequence
+from typing import Any, List, Optional, Sequence
 
 from llama_index.core.base.base_query_engine import BaseQueryEngine
 from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
-from llama_index.core.query_engine import SimpleMultiModalQueryEngine
+from llama_index.core.multi_modal_llms import MultiModalLLM
+from llama_index.core.prompts.base import BasePromptTemplate
+from llama_index.core.prompts.default_prompt_selectors import (
+    DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
+)
+from llama_index.core.query_engine import (
+    RetrieverQueryEngine,
+)
 from llama_index.core.query_engine.multi_modal import _get_image_and_text_nodes
-from llama_index.core.response_synthesizers import (
-    BaseSynthesizer,
-    get_response_synthesizer,
+from llama_index.core.response_synthesizers import TreeSummarize
+from llama_index.core.response_synthesizers.base import QueryTextType
+from llama_index.core.schema import (
+    ImageNode,
+    NodeWithScore,
 )
-from llama_index.core.response_synthesizers.type import ResponseMode
-from llama_index.core.schema import ImageNode, NodeWithScore, QueryBundle
 from llama_index.core.tools.query_engine import QueryEngineTool
 
 from app.settings import multi_modal_llm
 
 
-class MultiModalQueryEngine(SimpleMultiModalQueryEngine):
+class MultiModalSynthesizer(TreeSummarize):
     """
-    A multi-modal query engine that splits the retrieval results into chunks then summarizes each chunk to reduce the number of tokens in the response.
+    A synthesizer that summarizes text nodes and uses a multi-modal LLM to generate a response.
     """
 
     def __init__(
         self,
-        text_synthesizer: Optional[BaseSynthesizer] = None,
+        multimodal_model: Optional[MultiModalLLM] = None,
+        text_qa_template: Optional[BasePromptTemplate] = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
-        # Use a response synthesizer for text nodes summarization
-        self._text_synthesizer = text_synthesizer or get_response_synthesizer(
-            streaming=False,
-            response_mode=ResponseMode.TREE_SUMMARIZE,
-        )
-
-    def _summarize_text_nodes(
-        self, query_bundle: QueryBundle, nodes: List[NodeWithScore]
-    ) -> str:
-        """
-        Synthesize a response for the query using the retrieved nodes.
-        """
-        return str(
-            self._text_synthesizer.synthesize(
-                query=query_bundle,
-                nodes=nodes,
-                streaming=False,
-            )
-        )
-
-    def synthesize(
-        self,
-        query_bundle: QueryBundle,
-        nodes: List[NodeWithScore],
-        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
-    ) -> RESPONSE_TYPE:
-        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
-        # Summarize the text nodes
-        text_response = self._summarize_text_nodes(
-            query_bundle=query_bundle,
-            nodes=text_nodes,
-        )
-
-        fmt_prompt = self._text_qa_template.format(
-            context_str=text_response,
-            query_str=query_bundle.query_str,
-        )
-
-        llm_response = self._multi_modal_llm.complete(
-            prompt=fmt_prompt,
-            image_documents=[
-                image_node.node
-                for image_node in image_nodes
-                if isinstance(image_node.node, ImageNode)
-            ],
-        )
-
-        return Response(
-            response=str(llm_response),
-            source_nodes=nodes,
-            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
-        )
+        self._multi_modal_llm = multimodal_model
+        self._text_qa_template = text_qa_template or DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
 
     async def asynthesize(
         self,
-        query_bundle: QueryBundle,
+        query: QueryTextType,
         nodes: List[NodeWithScore],
         additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+        **response_kwargs: Any,
     ) -> RESPONSE_TYPE:
         image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
+
         # Summarize the text nodes to avoid exceeding the token limit
-        text_response = self._summarize_text_nodes(
-            query_bundle=query_bundle,
-            nodes=text_nodes,
-        )
+        text_response = str(await super().asynthesize(query, nodes))
 
         fmt_prompt = self._text_qa_template.format(
             context_str=text_response,
-            query_str=query_bundle.query_str,
+            query_str=query.query_str,  # type: ignore
         )
 
         llm_response = await self._multi_modal_llm.acomplete(
@@ -123,6 +80,7 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
         index: The index to create a query engine for.
         params (optional): Additional parameters for the query engine, e.g: similarity_top_k
     """
+
     top_k = int(os.getenv("TOP_K", 0))
     if top_k != 0 and kwargs.get("filters") is None:
         kwargs["similarity_top_k"] = top_k
@@ -132,12 +90,14 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
         retrieval_mode = kwargs.get("retrieval_mode")
         if retrieval_mode is None:
             kwargs["retrieval_mode"] = "auto_routed"
-            if multi_modal_llm:
-                # Note: image nodes are not supported for auto_routed or chunk retrieval mode
+            mm_model = multi_modal_llm.get()
+            if mm_model:
                 kwargs["retrieve_image_nodes"] = True
-                return MultiModalQueryEngine(
+                return RetrieverQueryEngine(
                     retriever=index.as_retriever(**kwargs),
-                    multi_modal_llm=multi_modal_llm,
+                    response_synthesizer=MultiModalSynthesizer(
+                        multimodal_model=mm_model
+                    ),
                 )
 
     return index.as_query_engine(**kwargs)
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index ae24ee4ad..abbc696ed 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -1,11 +1,13 @@
 import os
+from contextvars import ContextVar
 from typing import Dict, Optional
 
 from llama_index.core.multi_modal_llms import MultiModalLLM
 from llama_index.core.settings import Settings
 
-# Singleton for multi-modal LLM
-multi_modal_llm: Optional[MultiModalLLM] = None
+multi_modal_llm: ContextVar[Optional[MultiModalLLM]] = ContextVar(
+    "multi_modal_llm", default=None
+)
 
 
 def init_settings():
@@ -76,8 +78,7 @@ def init_openai():
     )
 
     if model_name in GPT4V_MODELS:
-        global multi_modal_llm
-        multi_modal_llm = OpenAIMultiModal(model=model_name)
+        multi_modal_llm.set(OpenAIMultiModal(model=model_name))
 
     dimensions = os.getenv("EMBEDDING_DIM")
     Settings.embed_model = OpenAIEmbedding(

From fa17d39e22832bf0176f7a4b27f4212d2c66f220 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 12:15:47 +0700
Subject: [PATCH 07/17] add synthesize

---
 .../python/agent/tools/query_engine.py        | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 905d20df9..542541bf0 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -39,6 +39,38 @@ def __init__(
         self._multi_modal_llm = multimodal_model
         self._text_qa_template = text_qa_template or DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
 
+    def synthesize(
+        self,
+        query: QueryTextType,
+        nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+        **response_kwargs: Any,
+    ) -> RESPONSE_TYPE:
+        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
+
+        # Summarize the text nodes to avoid exceeding the token limit
+        text_response = str(super().synthesize(query, nodes))
+
+        fmt_prompt = self._text_qa_template.format(
+            context_str=text_response,
+            query_str=query.query_str,  # type: ignore
+        )
+
+        llm_response = self._multi_modal_llm.complete(
+            prompt=fmt_prompt,
+            image_documents=[
+                image_node.node
+                for image_node in image_nodes
+                if isinstance(image_node.node, ImageNode)
+            ],
+        )
+
+        return Response(
+            response=str(llm_response),
+            source_nodes=nodes,
+            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
+        )
+
     async def asynthesize(
         self,
         query: QueryTextType,
@@ -93,6 +125,7 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
             mm_model = multi_modal_llm.get()
             if mm_model:
                 kwargs["retrieve_image_nodes"] = True
+                print("Using multi-modal model")
                 return RetrieverQueryEngine(
                     retriever=index.as_retriever(**kwargs),
                     response_synthesizer=MultiModalSynthesizer(

From 0ffb7ff4e1041d4e8b90202be140d50f3519ba36 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 13:18:24 +0700
Subject: [PATCH 08/17] use getter

---
 .../python/agent/tools/query_engine.py        | 43 +++----------------
 .../components/settings/python/settings.py    | 14 +++---
 2 files changed, 14 insertions(+), 43 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 542541bf0..6f664fe80 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -20,7 +20,7 @@
 )
 from llama_index.core.tools.query_engine import QueryEngineTool
 
-from app.settings import multi_modal_llm
+from app.settings import get_multi_modal_llm
 
 
 class MultiModalSynthesizer(TreeSummarize):
@@ -39,38 +39,6 @@ def __init__(
         self._multi_modal_llm = multimodal_model
         self._text_qa_template = text_qa_template or DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
 
-    def synthesize(
-        self,
-        query: QueryTextType,
-        nodes: List[NodeWithScore],
-        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
-        **response_kwargs: Any,
-    ) -> RESPONSE_TYPE:
-        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
-
-        # Summarize the text nodes to avoid exceeding the token limit
-        text_response = str(super().synthesize(query, nodes))
-
-        fmt_prompt = self._text_qa_template.format(
-            context_str=text_response,
-            query_str=query.query_str,  # type: ignore
-        )
-
-        llm_response = self._multi_modal_llm.complete(
-            prompt=fmt_prompt,
-            image_documents=[
-                image_node.node
-                for image_node in image_nodes
-                if isinstance(image_node.node, ImageNode)
-            ],
-        )
-
-        return Response(
-            response=str(llm_response),
-            source_nodes=nodes,
-            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
-        )
-
     async def asynthesize(
         self,
         query: QueryTextType,
@@ -122,18 +90,17 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
         retrieval_mode = kwargs.get("retrieval_mode")
         if retrieval_mode is None:
             kwargs["retrieval_mode"] = "auto_routed"
-            mm_model = multi_modal_llm.get()
-            if mm_model:
+            multi_modal_llm = get_multi_modal_llm()
+            if multi_modal_llm:
                 kwargs["retrieve_image_nodes"] = True
-                print("Using multi-modal model")
                 return RetrieverQueryEngine(
                     retriever=index.as_retriever(**kwargs),
                     response_synthesizer=MultiModalSynthesizer(
-                        multimodal_model=mm_model
+                        multimodal_model=multi_modal_llm
                     ),
                 )
 
-    return index.as_query_engine(**kwargs)
+    raise ValueError("Multi-modal LLM is not set")
 
 
 def get_query_engine_tool(
diff --git a/templates/components/settings/python/settings.py b/templates/components/settings/python/settings.py
index abbc696ed..ff647560c 100644
--- a/templates/components/settings/python/settings.py
+++ b/templates/components/settings/python/settings.py
@@ -1,13 +1,16 @@
 import os
-from contextvars import ContextVar
 from typing import Dict, Optional
 
 from llama_index.core.multi_modal_llms import MultiModalLLM
 from llama_index.core.settings import Settings
 
-multi_modal_llm: ContextVar[Optional[MultiModalLLM]] = ContextVar(
-    "multi_modal_llm", default=None
-)
+# `Settings` does not support setting `MultiModalLLM`
+# so we use a global variable to store it
+_multi_modal_llm: Optional[MultiModalLLM] = None
+
+
+def get_multi_modal_llm():
+    return _multi_modal_llm
 
 
 def init_settings():
@@ -78,7 +81,8 @@ def init_openai():
     )
 
     if model_name in GPT4V_MODELS:
-        multi_modal_llm.set(OpenAIMultiModal(model=model_name))
+        global _multi_modal_llm
+        _multi_modal_llm = OpenAIMultiModal(model=model_name)
 
     dimensions = os.getenv("EMBEDDING_DIM")
     Settings.embed_model = OpenAIEmbedding(

From b9d336c61057f0945b3af7f5bc8d3781510083a3 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:02:33 +0700
Subject: [PATCH 09/17] remove wrong code

---
 .../components/engines/python/agent/tools/query_engine.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 6f664fe80..c035975a0 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -53,7 +53,7 @@ async def asynthesize(
 
         fmt_prompt = self._text_qa_template.format(
             context_str=text_response,
-            query_str=query.query_str,  # type: ignore
+            query_str=query.query_str,
         )
 
         llm_response = await self._multi_modal_llm.acomplete(
@@ -100,7 +100,7 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
                     ),
                 )
 
-    raise ValueError("Multi-modal LLM is not set")
+    return index.as_query_engine(**kwargs)
 
 
 def get_query_engine_tool(

From 235159e057974167b4148be9b9a05f0d6050afed Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:20:30 +0700
Subject: [PATCH 10/17] improve code

---
 .../python/agent/tools/query_engine.py        | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index c035975a0..fa8e71b31 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,19 +1,19 @@
 import os
 from typing import Any, List, Optional, Sequence
 
+from llama_index.core import get_response_synthesizer
 from llama_index.core.base.base_query_engine import BaseQueryEngine
 from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
 from llama_index.core.multi_modal_llms import MultiModalLLM
 from llama_index.core.prompts.base import BasePromptTemplate
 from llama_index.core.prompts.default_prompt_selectors import (
-    DEFAULT_TREE_SUMMARIZE_PROMPT_SEL,
+    DEFAULT_TEXT_QA_PROMPT_SEL,
 )
 from llama_index.core.query_engine import (
     RetrieverQueryEngine,
 )
 from llama_index.core.query_engine.multi_modal import _get_image_and_text_nodes
-from llama_index.core.response_synthesizers import TreeSummarize
-from llama_index.core.response_synthesizers.base import QueryTextType
+from llama_index.core.response_synthesizers.base import BaseSynthesizer, QueryTextType
 from llama_index.core.schema import (
     ImageNode,
     NodeWithScore,
@@ -23,21 +23,23 @@
 from app.settings import get_multi_modal_llm
 
 
-class MultiModalSynthesizer(TreeSummarize):
+class MultiModalSynthesizer(BaseSynthesizer):
     """
     A synthesizer that summarizes text nodes and uses a multi-modal LLM to generate a response.
     """
 
     def __init__(
         self,
-        multimodal_model: Optional[MultiModalLLM] = None,
+        multimodal_model: MultiModalLLM,
+        response_synthesizer: Optional[BaseSynthesizer],
         text_qa_template: Optional[BasePromptTemplate] = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self._multi_modal_llm = multimodal_model
-        self._text_qa_template = text_qa_template or DEFAULT_TREE_SUMMARIZE_PROMPT_SEL
+        self._response_synthesizer = response_synthesizer
+        self._text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
 
     async def asynthesize(
         self,
@@ -49,11 +51,13 @@ async def asynthesize(
         image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
 
         # Summarize the text nodes to avoid exceeding the token limit
-        text_response = str(await super().asynthesize(query, nodes))
+        text_response = str(
+            await self._response_synthesizer.asynthesize(query, text_nodes)
+        )
 
         fmt_prompt = self._text_qa_template.format(
             context_str=text_response,
-            query_str=query.query_str,
+            query_str=query.query_str,  # type: ignore
         )
 
         llm_response = await self._multi_modal_llm.acomplete(
@@ -90,13 +94,13 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
         retrieval_mode = kwargs.get("retrieval_mode")
         if retrieval_mode is None:
             kwargs["retrieval_mode"] = "auto_routed"
-            multi_modal_llm = get_multi_modal_llm()
-            if multi_modal_llm:
+            if get_multi_modal_llm():
                 kwargs["retrieve_image_nodes"] = True
                 return RetrieverQueryEngine(
                     retriever=index.as_retriever(**kwargs),
                     response_synthesizer=MultiModalSynthesizer(
-                        multimodal_model=multi_modal_llm
+                        multimodal_model=get_multi_modal_llm(),
+                        response_synthesizer=get_response_synthesizer(),
                     ),
                 )
 

From 3b14f59cc1ee166837d4ff9732a5bfbef182b83b Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:26:27 +0700
Subject: [PATCH 11/17] fix missing abstract methods

---
 .../python/agent/tools/query_engine.py        | 22 ++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index fa8e71b31..c30f4234c 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,5 +1,5 @@
 import os
-from typing import Any, List, Optional, Sequence
+from typing import Any, Dict, List, Optional, Sequence
 
 from llama_index.core import get_response_synthesizer
 from llama_index.core.base.base_query_engine import BaseQueryEngine
@@ -19,6 +19,7 @@
     NodeWithScore,
 )
 from llama_index.core.tools.query_engine import QueryEngineTool
+from llama_index.core.types import RESPONSE_TEXT_TYPE
 
 from app.settings import get_multi_modal_llm
 
@@ -41,6 +42,25 @@ def __init__(
         self._response_synthesizer = response_synthesizer
         self._text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
 
+    def _get_prompts(self, **kwargs) -> Dict[str, Any]:
+        return {
+            "text_qa_template": self._text_qa_template,
+        }
+
+    def _update_prompts(self, prompts: Dict[str, Any]) -> None:
+        if "text_qa_template" in prompts:
+            self._text_qa_template = prompts["text_qa_template"]
+
+    async def aget_response(
+        self,
+        *args,
+        **response_kwargs: Any,
+    ) -> RESPONSE_TEXT_TYPE:
+        return await self._response_synthesizer.aget_response(*args, **response_kwargs)
+
+    def get_response(self, *args, **kwargs) -> RESPONSE_TEXT_TYPE:
+        return self._response_synthesizer.get_response(*args, **kwargs)
+
     async def asynthesize(
         self,
         query: QueryTextType,

From 81a8bfe4946f39350e36821c8f479ac3761439de Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:28:10 +0700
Subject: [PATCH 12/17] fix missing abstract methods

---
 .../python/agent/tools/query_engine.py        | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index c30f4234c..7ff49fb04 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -95,6 +95,38 @@ async def asynthesize(
             metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
         )
 
+    def synthesize(
+        self,
+        query: QueryTextType,
+        nodes: List[NodeWithScore],
+        additional_source_nodes: Optional[Sequence[NodeWithScore]] = None,
+        **response_kwargs: Any,
+    ) -> RESPONSE_TYPE:
+        image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
+
+        # Summarize the text nodes to avoid exceeding the token limit
+        text_response = str(self._response_synthesizer.synthesize(query, text_nodes))
+
+        fmt_prompt = self._text_qa_template.format(
+            context_str=text_response,
+            query_str=query.query_str,  # type: ignore
+        )
+
+        llm_response = self._multi_modal_llm.complete(
+            prompt=fmt_prompt,
+            image_documents=[
+                image_node.node
+                for image_node in image_nodes
+                if isinstance(image_node.node, ImageNode)
+            ],
+        )
+
+        return Response(
+            response=str(llm_response),
+            source_nodes=nodes,
+            metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
+        )
+
 
 def create_query_engine(index, **kwargs) -> BaseQueryEngine:
     """

From 2e8e89a9b419ba731935cd262cbaadc0151fbf5d Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:36:34 +0700
Subject: [PATCH 13/17] use MultiModalSynthesizer as default

---
 .../python/agent/tools/query_engine.py        | 126 +++++++++---------
 1 file changed, 64 insertions(+), 62 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 7ff49fb04..64276b160 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -9,9 +9,6 @@
 from llama_index.core.prompts.default_prompt_selectors import (
     DEFAULT_TEXT_QA_PROMPT_SEL,
 )
-from llama_index.core.query_engine import (
-    RetrieverQueryEngine,
-)
 from llama_index.core.query_engine.multi_modal import _get_image_and_text_nodes
 from llama_index.core.response_synthesizers.base import BaseSynthesizer, QueryTextType
 from llama_index.core.schema import (
@@ -24,6 +21,64 @@
 from app.settings import get_multi_modal_llm
 
 
+def create_query_engine(index, **kwargs) -> BaseQueryEngine:
+    """
+    Create a query engine for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        params (optional): Additional parameters for the query engine, e.g: similarity_top_k
+    """
+
+    top_k = int(os.getenv("TOP_K", 0))
+    if top_k != 0 and kwargs.get("filters") is None:
+        kwargs["similarity_top_k"] = top_k
+    multimodal_llm = get_multi_modal_llm()
+    if multimodal_llm:
+        kwargs["response_synthesizer"] = MultiModalSynthesizer(
+            multimodal_model=multimodal_llm,
+            response_synthesizer=get_response_synthesizer(),
+        )
+
+    # If index is index is LlamaCloudIndex
+    # use auto_routed mode for better query results
+    if index.__class__.__name__ == "LlamaCloudIndex":
+        retrieval_mode = kwargs.get("retrieval_mode")
+        if retrieval_mode is None:
+            kwargs["retrieval_mode"] = "auto_routed"
+            if multimodal_llm:
+                kwargs["retrieve_image_nodes"] = True
+    return index.as_query_engine(**kwargs)
+
+
+def get_query_engine_tool(
+    index,
+    name: Optional[str] = None,
+    description: Optional[str] = None,
+    **kwargs,
+) -> QueryEngineTool:
+    """
+    Get a query engine tool for the given index.
+
+    Args:
+        index: The index to create a query engine for.
+        name (optional): The name of the tool.
+        description (optional): The description of the tool.
+    """
+    if name is None:
+        name = "query_index"
+    if description is None:
+        description = (
+            "Use this tool to retrieve information about the text corpus from an index."
+        )
+    query_engine = create_query_engine(index, **kwargs)
+    return QueryEngineTool.from_defaults(
+        query_engine=query_engine,
+        name=name,
+        description=description,
+    )
+
+
 class MultiModalSynthesizer(BaseSynthesizer):
     """
     A synthesizer that summarizes text nodes and uses a multi-modal LLM to generate a response.
@@ -70,6 +125,9 @@ async def asynthesize(
     ) -> RESPONSE_TYPE:
         image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
 
+        if len(image_nodes) == 0:
+            return await self._response_synthesizer.asynthesize(query, text_nodes)
+
         # Summarize the text nodes to avoid exceeding the token limit
         text_response = str(
             await self._response_synthesizer.asynthesize(query, text_nodes)
@@ -104,6 +162,9 @@ def synthesize(
     ) -> RESPONSE_TYPE:
         image_nodes, text_nodes = _get_image_and_text_nodes(nodes)
 
+        if len(image_nodes) == 0:
+            return self._response_synthesizer.synthesize(query, text_nodes)
+
         # Summarize the text nodes to avoid exceeding the token limit
         text_response = str(self._response_synthesizer.synthesize(query, text_nodes))
 
@@ -126,62 +187,3 @@ def synthesize(
             source_nodes=nodes,
             metadata={"text_nodes": text_nodes, "image_nodes": image_nodes},
         )
-
-
-def create_query_engine(index, **kwargs) -> BaseQueryEngine:
-    """
-    Create a query engine for the given index.
-
-    Args:
-        index: The index to create a query engine for.
-        params (optional): Additional parameters for the query engine, e.g: similarity_top_k
-    """
-
-    top_k = int(os.getenv("TOP_K", 0))
-    if top_k != 0 and kwargs.get("filters") is None:
-        kwargs["similarity_top_k"] = top_k
-    # If index is index is LlamaCloudIndex
-    # use auto_routed mode for better query results
-    if index.__class__.__name__ == "LlamaCloudIndex":
-        retrieval_mode = kwargs.get("retrieval_mode")
-        if retrieval_mode is None:
-            kwargs["retrieval_mode"] = "auto_routed"
-            if get_multi_modal_llm():
-                kwargs["retrieve_image_nodes"] = True
-                return RetrieverQueryEngine(
-                    retriever=index.as_retriever(**kwargs),
-                    response_synthesizer=MultiModalSynthesizer(
-                        multimodal_model=get_multi_modal_llm(),
-                        response_synthesizer=get_response_synthesizer(),
-                    ),
-                )
-
-    return index.as_query_engine(**kwargs)
-
-
-def get_query_engine_tool(
-    index,
-    name: Optional[str] = None,
-    description: Optional[str] = None,
-    **kwargs,
-) -> QueryEngineTool:
-    """
-    Get a query engine tool for the given index.
-
-    Args:
-        index: The index to create a query engine for.
-        name (optional): The name of the tool.
-        description (optional): The description of the tool.
-    """
-    if name is None:
-        name = "query_index"
-    if description is None:
-        description = (
-            "Use this tool to retrieve information about the text corpus from an index."
-        )
-    query_engine = create_query_engine(index, **kwargs)
-    return QueryEngineTool.from_defaults(
-        query_engine=query_engine,
-        name=name,
-        description=description,
-    )

From 46096ba4cf78a85d65ed2e039b6c80f9535319b0 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:39:53 +0700
Subject: [PATCH 14/17] better code

---
 .../components/engines/python/agent/tools/query_engine.py    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 64276b160..8bfe3ff6a 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -37,7 +37,6 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
     if multimodal_llm:
         kwargs["response_synthesizer"] = MultiModalSynthesizer(
             multimodal_model=multimodal_llm,
-            response_synthesizer=get_response_synthesizer(),
         )
 
     # If index is index is LlamaCloudIndex
@@ -87,14 +86,14 @@ class MultiModalSynthesizer(BaseSynthesizer):
     def __init__(
         self,
         multimodal_model: MultiModalLLM,
-        response_synthesizer: Optional[BaseSynthesizer],
+        response_synthesizer: Optional[BaseSynthesizer] = None,
         text_qa_template: Optional[BasePromptTemplate] = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self._multi_modal_llm = multimodal_model
-        self._response_synthesizer = response_synthesizer
+        self._response_synthesizer = response_synthesizer or get_response_synthesizer()
         self._text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
 
     def _get_prompts(self, **kwargs) -> Dict[str, Any]:

From a7f56a766e259fd2f9c90b3aafcc16ef7a4c3371 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:49:02 +0700
Subject: [PATCH 15/17] simplify code

---
 .../engines/python/agent/tools/query_engine.py       | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 8bfe3ff6a..8c3c1b1d7 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,7 +1,6 @@
 import os
 from typing import Any, Dict, List, Optional, Sequence
 
-from llama_index.core import get_response_synthesizer
 from llama_index.core.base.base_query_engine import BaseQueryEngine
 from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
 from llama_index.core.multi_modal_llms import MultiModalLLM
@@ -42,11 +41,10 @@ def create_query_engine(index, **kwargs) -> BaseQueryEngine:
     # If index is index is LlamaCloudIndex
     # use auto_routed mode for better query results
     if index.__class__.__name__ == "LlamaCloudIndex":
-        retrieval_mode = kwargs.get("retrieval_mode")
-        if retrieval_mode is None:
+        if kwargs.get("retrieval_mode") is None:
             kwargs["retrieval_mode"] = "auto_routed"
-            if multimodal_llm:
-                kwargs["retrieve_image_nodes"] = True
+        if multimodal_llm:
+            kwargs["retrieve_image_nodes"] = True
     return index.as_query_engine(**kwargs)
 
 
@@ -86,14 +84,14 @@ class MultiModalSynthesizer(BaseSynthesizer):
     def __init__(
         self,
         multimodal_model: MultiModalLLM,
-        response_synthesizer: Optional[BaseSynthesizer] = None,
+        response_synthesizer: Optional[BaseSynthesizer],
         text_qa_template: Optional[BasePromptTemplate] = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self._multi_modal_llm = multimodal_model
-        self._response_synthesizer = response_synthesizer or get_response_synthesizer()
+        self._response_synthesizer = response_synthesizer
         self._text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
 
     def _get_prompts(self, **kwargs) -> Dict[str, Any]:

From af026747f6717ef0c7484140374b7a3e62a91b09 Mon Sep 17 00:00:00 2001
From: leehuwuj <leehuwuj@gmail.com>
Date: Fri, 29 Nov 2024 15:59:22 +0700
Subject: [PATCH 16/17] fix wrong params

---
 .../components/engines/python/agent/tools/query_engine.py    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/templates/components/engines/python/agent/tools/query_engine.py b/templates/components/engines/python/agent/tools/query_engine.py
index 8c3c1b1d7..396fb1d6e 100644
--- a/templates/components/engines/python/agent/tools/query_engine.py
+++ b/templates/components/engines/python/agent/tools/query_engine.py
@@ -1,6 +1,7 @@
 import os
 from typing import Any, Dict, List, Optional, Sequence
 
+from llama_index.core import get_response_synthesizer
 from llama_index.core.base.base_query_engine import BaseQueryEngine
 from llama_index.core.base.response.schema import RESPONSE_TYPE, Response
 from llama_index.core.multi_modal_llms import MultiModalLLM
@@ -84,14 +85,14 @@ class MultiModalSynthesizer(BaseSynthesizer):
     def __init__(
         self,
         multimodal_model: MultiModalLLM,
-        response_synthesizer: Optional[BaseSynthesizer],
+        response_synthesizer: Optional[BaseSynthesizer] = None,
         text_qa_template: Optional[BasePromptTemplate] = None,
         *args,
         **kwargs,
     ):
         super().__init__(*args, **kwargs)
         self._multi_modal_llm = multimodal_model
-        self._response_synthesizer = response_synthesizer
+        self._response_synthesizer = response_synthesizer or get_response_synthesizer()
         self._text_qa_template = text_qa_template or DEFAULT_TEXT_QA_PROMPT_SEL
 
     def _get_prompts(self, **kwargs) -> Dict[str, Any]:

From 7516b06951b6f9ab7fe28488ff040d01a20b105f Mon Sep 17 00:00:00 2001
From: Marcus Schiesser <mail@marcusschiesser.de>
Date: Fri, 29 Nov 2024 18:00:49 +0700
Subject: [PATCH 17/17] docs: changeset

---
 .changeset/blue-hornets-boil.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/blue-hornets-boil.md

diff --git a/.changeset/blue-hornets-boil.md b/.changeset/blue-hornets-boil.md
new file mode 100644
index 000000000..e8c2928d4
--- /dev/null
+++ b/.changeset/blue-hornets-boil.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add support multimodal indexes (e.g. from LlamaCloud)