run-llama · marcusschiesser · Sep 9, 2024 · Sep 6, 2024 · Sep 6, 2024 · Sep 6, 2024
diff --git a/.changeset/cyan-buttons-clean.md b/.changeset/cyan-buttons-clean.md
@@ -0,0 +1,5 @@
+---
+"create-llama": patch
+---
+
+Add env config for next questions feature
diff --git a/helpers/env-variables.ts b/helpers/env-variables.ts
@@ -486,34 +486,29 @@ It\\'s cute animal.
   return systemPromptEnv;
 };
 
-const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
-  if (template === "multiagent") {
-    return [
-      {
-        name: "MESSAGE_QUEUE_PORT",
-      },
-      {
-        name: "CONTROL_PLANE_PORT",
-      },
-      {
-        name: "HUMAN_CONSUMER_PORT",
-      },
-      {
-        name: "AGENT_QUERY_ENGINE_PORT",
-        value: "8003",
-      },
-      {
-        name: "AGENT_QUERY_ENGINE_DESCRIPTION",
-        value: "Query information from the provided data",
-      },
-      {
-        name: "AGENT_DUMMY_PORT",
-        value: "8004",
-      },
-    ];
-  } else {
-    return [];
+const getTemplateEnvs = (
+  template?: TemplateType,
+  framework?: TemplateFramework,
+): EnvVar[] => {
+  const nextQuestionEnvs: EnvVar[] = [
+    {
+      name: "NEXT_QUESTION_PROMPT",
+      description: `Customize prompt to generate the next question suggestions based on the conversation history.
+Disable this prompt to disable the next question suggestions feature.`,
+      value: `"You're a helpful assistant! Your task is to suggest the next question that user might ask. 
+Here is the conversation history
+---------------------\n{conversation}\n---------------------
+Given the conversation history, please give me 3 questions that you might ask next!"`,
+    },
+  ];
+
+  if (
+    framework === "fastapi" &&
+    (template === "multiagent" || template === "streaming")
+  ) {
+    return nextQuestionEnvs;
   }
+  return [];
 };
 
 const getObservabilityEnvs = (
@@ -560,7 +555,7 @@ export const createBackendEnvFile = async (
     ...getVectorDBEnvs(opts.vectorDb, opts.framework),
     ...getFrameworkEnvs(opts.framework, opts.externalPort),
     ...getToolEnvs(opts.tools),
-    ...getTemplateEnvs(opts.template),
+    ...getTemplateEnvs(opts.template, opts.framework),
     ...getObservabilityEnvs(opts.observability),
     ...getSystemPromptEnv(opts.tools, opts.dataSources, opts.framework),
   ];

diff --git a/helpers/python.ts b/helpers/python.ts
@@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
     cwd: path.join(compPath, "settings", "python"),
   });
 
+  // Copy services
+  if (template == "streaming" || template == "multiagent") {
+    await copy("**", path.join(root, "app", "api", "services"), {
+      cwd: path.join(compPath, "services", "python"),
+    });
+  }
+
   if (template === "streaming") {
     // For the streaming template only:
     // Select and copy engine code based on data sources and tools

diff --git a/...treaming/fastapi/app/api/services/file.py → templates/components/services/python/file.py b/...treaming/fastapi/app/api/services/file.py → templates/components/services/python/file.py
diff --git a/...ng/fastapi/app/api/services/suggestion.py → .../components/services/python/suggestion.py b/...ng/fastapi/app/api/services/suggestion.py → .../components/services/python/suggestion.py
@@ -1,20 +1,12 @@
 import logging
-from typing import List
+import os
+from typing import List, Optional
 
 from app.api.routers.models import Message
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
 from pydantic import BaseModel
 
-NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
-    "You're a helpful assistant! Your task is to suggest the next question that user might ask. "
-    "\nHere is the conversation history"
-    "\n---------------------\n{conversation}\n---------------------"
-    "Given the conversation history, please give me {number_of_questions} questions that you might ask next!"
-)
-N_QUESTION_TO_GENERATE = 3
-
-
 logger = logging.getLogger("uvicorn")
 
 
@@ -25,15 +17,24 @@ class NextQuestions(BaseModel):
 
 
 class NextQuestionSuggestion:
-    @staticmethod
+
+    @classmethod
+    def get_configured_prompt(cls) -> Optional[str]:
+        return os.getenv("NEXT_QUESTION_PROMPT", None)
+
+    @classmethod
     async def suggest_next_questions(
+        cls,
         messages: List[Message],
-        number_of_questions: int = N_QUESTION_TO_GENERATE,
-    ) -> List[str]:
+    ) -> Optional[List[str]]:
         """
         Suggest the next questions that user might ask based on the conversation history
-        Return as empty list if there is an error
+        Return None if suggestion is disabled or there is an error
         """
+        prompt_template = cls.get_configured_prompt()
+        if not prompt_template:
+            return None
+
         try:
             # Reduce the cost by only using the last two messages
             last_user_message = None
@@ -49,12 +50,11 @@ async def suggest_next_questions(
 
             output: NextQuestions = await Settings.llm.astructured_predict(
                 NextQuestions,
-                prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
+                prompt=PromptTemplate(prompt_template),
                 conversation=conversation,
-                number_of_questions=number_of_questions,
             )
 
             return output.questions
         except Exception as e:
             logger.error(f"Error when generating next question: {e}")
-            return []
+            return None
diff --git a/templates/types/multiagent/fastapi/app/api/routers/vercel_response.py b/templates/types/multiagent/fastapi/app/api/routers/vercel_response.py
@@ -1,15 +1,15 @@
-from asyncio import Task
 import json
 import logging
+from asyncio import Task
 from typing import AsyncGenerator
 
 from aiostream import stream
+from app.agents.single import AgentRunEvent, AgentRunResult
+from app.api.routers.models import ChatData, Message
+from app.api.services.suggestion import NextQuestionSuggestion
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 
-from app.api.routers.models import ChatData
-from app.agents.single import AgentRunEvent, AgentRunResult
-
 logger = logging.getLogger("uvicorn")
 
 
@@ -57,16 +57,32 @@ async def content_generator(
         # Yield the text response
         async def _chat_response_generator():
             result = await task
+            final_response = ""
 
             if isinstance(result, AgentRunResult):
                 for token in result.response.message.content:
                     yield VercelStreamResponse.convert_text(token)
 
             if isinstance(result, AsyncGenerator):
                 async for token in result:
+                    final_response += token.delta
                     yield VercelStreamResponse.convert_text(token.delta)
 
-            # TODO: stream NextQuestionSuggestion
+            # Generate next questions if next question prompt is configured
+            if NextQuestionSuggestion.get_configured_prompt() is not None:
+                conversation = chat_data.messages + [
+                    Message(role="assistant", content=final_response)
+                ]
+                questions = await NextQuestionSuggestion.suggest_next_questions(
+                    conversation
+                )
+                if questions:
+                    yield VercelStreamResponse.convert_data(
+                        {
+                            "type": "suggested_questions",
+                            "data": questions,
+                        }
+                    )
             # TODO: stream sources
 
         # Yield the events from the event handler

diff --git a/templates/types/multiagent/fastapi/pyproject.toml b/templates/types/multiagent/fastapi/pyproject.toml
@@ -16,6 +16,7 @@ llama-index = "^0.11.4"
 fastapi = "^0.112.2"
 python-dotenv = "^1.0.0"
 uvicorn = { extras = ["standard"], version = "^0.23.2" }
+pydantic-settings = "^2.4.0"
 cachetools = "^5.3.3"
 aiostream = "^0.5.2"
 

diff --git a/templates/types/streaming/fastapi/app/api/routers/vercel_response.py b/templates/types/streaming/fastapi/app/api/routers/vercel_response.py
@@ -56,20 +56,21 @@ async def _chat_response_generator():
                 final_response += token
                 yield VercelStreamResponse.convert_text(token)
 
-            # Generate questions that user might interested to
-            conversation = chat_data.messages + [
-                Message(role="assistant", content=final_response)
-            ]
-            questions = await NextQuestionSuggestion.suggest_next_questions(
-                conversation
-            )
-            if len(questions) > 0:
-                yield VercelStreamResponse.convert_data(
-                    {
-                        "type": "suggested_questions",
-                        "data": questions,
-                    }
+            # Generate next questions if next question prompt is configured
+            if NextQuestionSuggestion.get_configured_prompt() is not None:
+                conversation = chat_data.messages + [
+                    Message(role="assistant", content=final_response)
+                ]
+                questions = await NextQuestionSuggestion.suggest_next_questions(
+                    conversation
                 )
+                if questions:
+                    yield VercelStreamResponse.convert_data(
+                        {
+                            "type": "suggested_questions",
+                            "data": questions,
+                        }
+                    )
 
             # the text_generator is the leading stream, once it's finished, also finish the event stream
             event_handler.is_done = True

diff --git a/templates/types/streaming/fastapi/pyproject.toml b/templates/types/streaming/fastapi/pyproject.toml
@@ -14,8 +14,9 @@ fastapi = "^0.109.1"
 uvicorn = { extras = ["standard"], version = "^0.23.2" }
 python-dotenv = "^1.0.0"
 aiostream = "^0.5.2"
-llama-index = "0.11.6"
+pydantic-settings = "^2.4.0"
 cachetools = "^5.3.3"
+llama-index = "0.11.6"
 
 [build-system]
 requires = ["poetry-core"]