run-llama · marcusschiesser · Sep 9, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/.changeset/cyan-buttons-clean.md b/.changeset/cyan-buttons-clean.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add env config for next questions feature
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
@@ -487,33 +487,28 @@ It\\'s cute animal.
 };
 
 const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
-  if (template === "multiagent") {
-    return [
-      {
-        name: "MESSAGE_QUEUE_PORT",
-      },
-      {
-        name: "CONTROL_PLANE_PORT",
-      },
-      {
-        name: "HUMAN_CONSUMER_PORT",
-      },
-      {
-        name: "AGENT_QUERY_ENGINE_PORT",
-        value: "8003",
-      },
-      {
-        name: "AGENT_QUERY_ENGINE_DESCRIPTION",
-        value: "Query information from the provided data",
-      },
-      {
-        name: "AGENT_DUMMY_PORT",
-        value: "8004",
-      },
-    ];
-  } else {
-    return [];
+  const nextQuestionEnvs: EnvVar[] = [
+    {
+      name: "NEXT_QUESTION_ENABLE",
+      description: "Whether to show next question suggestions",
+      value: "true",
+    },
+    {
+      name: "NEXT_QUESTION_PROMPT",
+      description: `Customize prompt to generate the next question suggestions based on the conversation history.
+Default prompt is:
+NEXT_QUESTION_PROMPT=# You're a helpful assistant! Your task is to suggest the next question that user might ask. 
+# Here is the conversation history
+# ---------------------\n{conversation}\n---------------------
+# Given the conversation history, please give me 3 questions that you might ask next!
+`,
+    },
+  ];
+
+  if (template === "multiagent" || template === "streaming") {
+    return nextQuestionEnvs;
   }
+  return [];
 };
 
 const getObservabilityEnvs = (

diff --git a/helpers/python.ts b/helpers/python.ts
@@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
     cwd: path.join(compPath, "settings", "python"),
   });
 
+  // Copy services
+  if (template == "streaming" || template == "multiagent") {
+    await copy("**", path.join(root, "app", "api", "services"), {
+      cwd: path.join(compPath, "services", "python"),
+    });
+  }
+
   if (template === "streaming") {
     // For the streaming template only:
     // Select and copy engine code based on data sources and tools

diff --git a/templates/components/services/python/file.py b/templates/components/services/python/file.py
@@ -0,0 +1,119 @@
+import base64
+import mimetypes
+import os
+from io import BytesIO
+from pathlib import Path
+from typing import Any, List, Tuple
+
+from app.engine.index import IndexConfig, get_index
+from llama_index.core import VectorStoreIndex
+from llama_index.core.ingestion import IngestionPipeline
+from llama_index.core.readers.file.base import (
+    _try_loading_included_file_formats as get_file_loaders_map,
+)
+from llama_index.core.schema import Document
+from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
+from llama_index.readers.file import FlatReader
+
+
+def get_llamaparse_parser():
+    from app.engine.loaders import load_configs
+    from app.engine.loaders.file import FileLoaderConfig, llama_parse_parser
+
+    config = load_configs()
+    file_loader_config = FileLoaderConfig(**config["file"])
+    if file_loader_config.use_llama_parse:
+        return llama_parse_parser()
+    else:
+        return None
+
+
+def default_file_loaders_map():
+    default_loaders = get_file_loaders_map()
+    default_loaders[".txt"] = FlatReader
+    return default_loaders
+
+
+class PrivateFileService:
+    PRIVATE_STORE_PATH = "output/uploaded"
+
+    @staticmethod
+    def preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
+        header, data = base64_content.split(",", 1)
+        mime_type = header.split(";")[0].split(":", 1)[1]
+        extension = mimetypes.guess_extension(mime_type)
+        # File data as bytes
+        return base64.b64decode(data), extension
+
+    @staticmethod
+    def store_and_parse_file(file_name, file_data, extension) -> List[Document]:
+        # Store file to the private directory
+        os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
+        file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))
+
+        # write file
+        with open(file_path, "wb") as f:
+            f.write(file_data)
+
+        # Load file to documents
+        # If LlamaParse is enabled, use it to parse the file
+        # Otherwise, use the default file loaders
+        reader = get_llamaparse_parser()
+        if reader is None:
+            reader_cls = default_file_loaders_map().get(extension)
+            if reader_cls is None:
+                raise ValueError(f"File extension {extension} is not supported")
+            reader = reader_cls()
+        documents = reader.load_data(file_path)
+        # Add custom metadata
+        for doc in documents:
+            doc.metadata["file_name"] = file_name
+            doc.metadata["private"] = "true"
+        return documents
+
+    @staticmethod
+    def process_file(file_name: str, base64_content: str, params: Any) -> List[str]:
+        file_data, extension = PrivateFileService.preprocess_base64_file(base64_content)
+
+        # Add the nodes to the index and persist it
+        index_config = IndexConfig(**params)
+        current_index = get_index(index_config)
+
+        # Insert the documents into the index
+        if isinstance(current_index, LlamaCloudIndex):
+            from app.engine.service import LLamaCloudFileService
+
+            project_id = current_index._get_project_id()
+            pipeline_id = current_index._get_pipeline_id()
+            # LlamaCloudIndex is a managed index so we can directly use the files
+            upload_file = (file_name, BytesIO(file_data))
+            return [
+                LLamaCloudFileService.add_file_to_pipeline(
+                    project_id,
+                    pipeline_id,
+                    upload_file,
+                    custom_metadata={
+                        # Set private=true to mark the document as private user docs (required for filtering)
+                        "private": "true",
+                    },
+                )
+            ]
+        else:
+            # First process documents into nodes
+            documents = PrivateFileService.store_and_parse_file(
+                file_name, file_data, extension
+            )
+            pipeline = IngestionPipeline()
+            nodes = pipeline.run(documents=documents)
+
+            # Add the nodes to the index and persist it
+            if current_index is None:
+                current_index = VectorStoreIndex(nodes=nodes)
+            else:
+                current_index.insert_nodes(nodes=nodes)
+            current_index.storage_context.persist(
+                persist_dir=os.environ.get("STORAGE_DIR", "storage")
+            )
+
+            # Return the document ids
+            return [doc.doc_id for doc in documents]
diff --git a/templates/components/services/python/suggestion.py b/templates/components/services/python/suggestion.py
@@ -0,0 +1,73 @@
+import logging
+from typing import List, Optional
+
+from llama_index.core.prompts import PromptTemplate
+from llama_index.core.settings import Settings
+from pydantic import BaseModel
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+from app.api.routers.models import Message
+
+logger = logging.getLogger("uvicorn")
+
+
+class NextQuestionSettings(BaseSettings):
+    enable: bool = True
+    prompt_template: str = (
+        "You're a helpful assistant! Your task is to suggest the next question that user might ask. "
+        "\nHere is the conversation history"
+        "\n---------------------\n{conversation}\n---------------------"
+        "Given the conversation history, please give me 3 questions that you might ask next!"
+    )
+
+    model_config = SettingsConfigDict(env_prefix="NEXT_QUESTION_")
+
+    @property
+    def prompt(self) -> PromptTemplate:
+        return PromptTemplate(self.prompt_template)
+
+
+next_question_settings = NextQuestionSettings()
+
+
+class NextQuestions(BaseModel):
+    """A list of questions that user might ask next"""
+
+    questions: List[str]
+
+
+class NextQuestionSuggestion:
+    @staticmethod
+    async def suggest_next_questions(
+        messages: List[Message],
+    ) -> Optional[List[str]]:
+        """
+        Suggest the next questions that user might ask based on the conversation history
+        Return None if suggestion is disabled or there is an error
+        """
+        if not next_question_settings.enable:
+            return None
+
+        try:
+            # Reduce the cost by only using the last two messages
+            last_user_message = None
+            last_assistant_message = None
+            for message in reversed(messages):
+                if message.role == "user":
+                    last_user_message = f"User: {message.content}"
+                elif message.role == "assistant":
+                    last_assistant_message = f"Assistant: {message.content}"
+                if last_user_message and last_assistant_message:
+                    break
+            conversation: str = f"{last_user_message}\n{last_assistant_message}"
+
+            output: NextQuestions = await Settings.llm.astructured_predict(
+                NextQuestions,
+                prompt=next_question_settings.prompt,
+                conversation=conversation,
+            )
+
+            return output.questions
+        except Exception as e:
+            logger.error(f"Error when generating next question: {e}")
+            return None
diff --git a/templates/types/multiagent/fastapi/app/api/routers/vercel_response.py b/templates/types/multiagent/fastapi/app/api/routers/vercel_response.py
@@ -7,8 +7,9 @@
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 
-from app.api.routers.models import ChatData
+from app.api.routers.models import ChatData, Message
 from app.agents.single import AgentRunEvent, AgentRunResult
+from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings
 
 logger = logging.getLogger("uvicorn")
 
@@ -57,16 +58,32 @@ async def content_generator(
         # Yield the text response
         async def _chat_response_generator():
             result = await task
+            final_response = ""
 
             if isinstance(result, AgentRunResult):
                 for token in result.response.message.content:
                     yield VercelStreamResponse.convert_text(token)
 
             if isinstance(result, AsyncGenerator):
                 async for token in result:
+                    final_response += token.delta
                     yield VercelStreamResponse.convert_text(token.delta)
 
-            # TODO: stream NextQuestionSuggestion
+            # Generate questions that user might be interested in
+            if next_question_settings.enable:
+                conversation = chat_data.messages + [
+                    Message(role="assistant", content=final_response)
+                ]
+                questions = await NextQuestionSuggestion.suggest_next_questions(
+                    conversation
+                )
+                if questions:
+                    yield VercelStreamResponse.convert_data(
+                        {
+                            "type": "suggested_questions",
+                            "data": questions,
+                        }
+                    )
             # TODO: stream sources
 
         # Yield the events from the event handler

diff --git a/templates/types/multiagent/fastapi/pyproject.toml b/templates/types/multiagent/fastapi/pyproject.toml
@@ -16,6 +16,7 @@ llama-index = "^0.11.4"
 fastapi = "^0.112.2"
 python-dotenv = "^1.0.0"
 uvicorn = { extras = ["standard"], version = "^0.23.2" }
+pydantic-settings = "^2.4.0"
 cachetools = "^5.3.3"
 aiostream = "^0.5.2"
 

diff --git a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
@@ -7,7 +7,7 @@
 
 from app.api.routers.events import EventCallbackHandler
 from app.api.routers.models import ChatData, Message, SourceNodes
-from app.api.services.suggestion import NextQuestionSuggestion
+from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings
 
 
 class VercelStreamResponse(StreamingResponse):
@@ -56,20 +56,21 @@ async def _chat_response_generator():
                 final_response += token
                 yield VercelStreamResponse.convert_text(token)
 
-            # Generate questions that user might interested to
-            conversation = chat_data.messages + [
-                Message(role="assistant", content=final_response)
-            ]
-            questions = await NextQuestionSuggestion.suggest_next_questions(
-                conversation
-            )
-            if len(questions) > 0:
-                yield VercelStreamResponse.convert_data(
-                    {
-                        "type": "suggested_questions",
-                        "data": questions,
-                    }
+            # Generate questions that user might be interested in
+            if next_question_settings.enable:
+                conversation = chat_data.messages + [
+                    Message(role="assistant", content=final_response)
+                ]
+                questions = await NextQuestionSuggestion.suggest_next_questions(
+                    conversation
                 )
+                if questions:
+                    yield VercelStreamResponse.convert_data(
+                        {
+                            "type": "suggested_questions",
+                            "data": questions,
+                        }
+                    )
 
             # the text_generator is the leading stream, once it's finished, also finish the event stream
             event_handler.is_done = True

diff --git a/templates/types/streaming/fastapi/pyproject.toml b/templates/types/streaming/fastapi/pyproject.toml
@@ -14,8 +14,9 @@ fastapi = "^0.109.1"
 uvicorn = { extras = ["standard"], version = "^0.23.2" }
 python-dotenv = "^1.0.0"
 aiostream = "^0.5.2"
-llama-index = "0.11.6"
+pydantic-settings = "^2.4.0"
 cachetools = "^5.3.3"
+llama-index = "0.11.6"
 
 [build-system]
 requires = ["poetry-core"]