Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Make suggest next questions configurable #275

Merged
merged 10 commits into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/cyan-buttons-clean.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Add env config for next questions feature
47 changes: 21 additions & 26 deletions helpers/env-variables.ts
Original file line number Diff line number Diff line change
Expand Up @@ -487,33 +487,28 @@ It\\'s cute animal.
};

const getTemplateEnvs = (template?: TemplateType): EnvVar[] => {
if (template === "multiagent") {
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
return [
{
name: "MESSAGE_QUEUE_PORT",
},
{
name: "CONTROL_PLANE_PORT",
},
{
name: "HUMAN_CONSUMER_PORT",
},
{
name: "AGENT_QUERY_ENGINE_PORT",
value: "8003",
},
{
name: "AGENT_QUERY_ENGINE_DESCRIPTION",
value: "Query information from the provided data",
},
{
name: "AGENT_DUMMY_PORT",
value: "8004",
},
];
} else {
return [];
const nextQuestionEnvs: EnvVar[] = [
{
name: "NEXT_QUESTION_ENABLE",
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
description: "Whether to show next question suggestions",
value: "true",
},
{
name: "NEXT_QUESTION_PROMPT",
description: `Customize prompt to generate the next question suggestions based on the conversation history.
Default prompt is:
NEXT_QUESTION_PROMPT=# You're a helpful assistant! Your task is to suggest the next question that user might ask.
# Here is the conversation history
# ---------------------\n{conversation}\n---------------------
# Given the conversation history, please give me 3 questions that you might ask next!
`,
},
];

if (template === "multiagent" || template === "streaming") {
return nextQuestionEnvs;
}
return [];
};

const getObservabilityEnvs = (
Expand Down
7 changes: 7 additions & 0 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,13 @@ export const installPythonTemplate = async ({
cwd: path.join(compPath, "settings", "python"),
});

// Copy services
if (template == "streaming" || template == "multiagent") {
await copy("**", path.join(root, "app", "api", "services"), {
cwd: path.join(compPath, "services", "python"),
});
}
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved

if (template === "streaming") {
// For the streaming template only:
// Select and copy engine code based on data sources and tools
Expand Down
119 changes: 119 additions & 0 deletions templates/components/services/python/file.py
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import base64
import mimetypes
import os
from io import BytesIO
from pathlib import Path
from typing import Any, List, Tuple

from app.engine.index import IndexConfig, get_index
from llama_index.core import VectorStoreIndex
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.readers.file.base import (
_try_loading_included_file_formats as get_file_loaders_map,
)
from llama_index.core.schema import Document
from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
from llama_index.readers.file import FlatReader


def get_llamaparse_parser():
from app.engine.loaders import load_configs
from app.engine.loaders.file import FileLoaderConfig, llama_parse_parser

config = load_configs()
file_loader_config = FileLoaderConfig(**config["file"])
if file_loader_config.use_llama_parse:
return llama_parse_parser()
else:
return None
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved


def default_file_loaders_map():
default_loaders = get_file_loaders_map()
default_loaders[".txt"] = FlatReader
return default_loaders


class PrivateFileService:
PRIVATE_STORE_PATH = "output/uploaded"

@staticmethod
def preprocess_base64_file(base64_content: str) -> Tuple[bytes, str | None]:
header, data = base64_content.split(",", 1)
mime_type = header.split(";")[0].split(":", 1)[1]
extension = mimetypes.guess_extension(mime_type)
# File data as bytes
return base64.b64decode(data), extension

@staticmethod
def store_and_parse_file(file_name, file_data, extension) -> List[Document]:
# Store file to the private directory
os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))

# write file
with open(file_path, "wb") as f:
f.write(file_data)

# Load file to documents
# If LlamaParse is enabled, use it to parse the file
# Otherwise, use the default file loaders
reader = get_llamaparse_parser()
if reader is None:
reader_cls = default_file_loaders_map().get(extension)
if reader_cls is None:
raise ValueError(f"File extension {extension} is not supported")
reader = reader_cls()
documents = reader.load_data(file_path)
# Add custom metadata
for doc in documents:
doc.metadata["file_name"] = file_name
doc.metadata["private"] = "true"
return documents

leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
@staticmethod
def process_file(file_name: str, base64_content: str, params: Any) -> List[str]:
file_data, extension = PrivateFileService.preprocess_base64_file(base64_content)

# Add the nodes to the index and persist it
index_config = IndexConfig(**params)
current_index = get_index(index_config)

# Insert the documents into the index
if isinstance(current_index, LlamaCloudIndex):
from app.engine.service import LLamaCloudFileService

project_id = current_index._get_project_id()
pipeline_id = current_index._get_pipeline_id()
# LlamaCloudIndex is a managed index so we can directly use the files
upload_file = (file_name, BytesIO(file_data))
return [
LLamaCloudFileService.add_file_to_pipeline(
project_id,
pipeline_id,
upload_file,
custom_metadata={
# Set private=true to mark the document as private user docs (required for filtering)
"private": "true",
},
)
]
else:
# First process documents into nodes
documents = PrivateFileService.store_and_parse_file(
file_name, file_data, extension
)
pipeline = IngestionPipeline()
nodes = pipeline.run(documents=documents)

# Add the nodes to the index and persist it
if current_index is None:
current_index = VectorStoreIndex(nodes=nodes)
else:
current_index.insert_nodes(nodes=nodes)
current_index.storage_context.persist(
persist_dir=os.environ.get("STORAGE_DIR", "storage")
)

# Return the document ids
return [doc.doc_id for doc in documents]
73 changes: 73 additions & 0 deletions templates/components/services/python/suggestion.py
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import logging
from typing import List, Optional

from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings
from pydantic import BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved

from app.api.routers.models import Message

logger = logging.getLogger("uvicorn")


class NextQuestionSettings(BaseSettings):
enable: bool = True
prompt_template: str = (
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
"You're a helpful assistant! Your task is to suggest the next question that user might ask. "
"\nHere is the conversation history"
"\n---------------------\n{conversation}\n---------------------"
"Given the conversation history, please give me 3 questions that you might ask next!"
)

model_config = SettingsConfigDict(env_prefix="NEXT_QUESTION_")

@property
def prompt(self) -> PromptTemplate:
return PromptTemplate(self.prompt_template)


next_question_settings = NextQuestionSettings()


class NextQuestions(BaseModel):
"""A list of questions that user might ask next"""

questions: List[str]


class NextQuestionSuggestion:
@staticmethod
async def suggest_next_questions(
messages: List[Message],
) -> Optional[List[str]]:
"""
Suggest the next questions that user might ask based on the conversation history
Return None if suggestion is disabled or there is an error
"""
if not next_question_settings.enable:
return None

try:
# Reduce the cost by only using the last two messages
last_user_message = None
last_assistant_message = None
for message in reversed(messages):
if message.role == "user":
last_user_message = f"User: {message.content}"
elif message.role == "assistant":
last_assistant_message = f"Assistant: {message.content}"
if last_user_message and last_assistant_message:
break
conversation: str = f"{last_user_message}\n{last_assistant_message}"

output: NextQuestions = await Settings.llm.astructured_predict(
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
NextQuestions,
prompt=next_question_settings.prompt,
conversation=conversation,
)

return output.questions
except Exception as e:
logger.error(f"Error when generating next question: {e}")
return None
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
from fastapi import Request
from fastapi.responses import StreamingResponse

from app.api.routers.models import ChatData
from app.api.routers.models import ChatData, Message
from app.agents.single import AgentRunEvent, AgentRunResult
from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings

logger = logging.getLogger("uvicorn")

Expand Down Expand Up @@ -57,16 +58,32 @@ async def content_generator(
# Yield the text response
async def _chat_response_generator():
result = await task
final_response = ""

if isinstance(result, AgentRunResult):
for token in result.response.message.content:
yield VercelStreamResponse.convert_text(token)

if isinstance(result, AsyncGenerator):
async for token in result:
final_response += token.delta
yield VercelStreamResponse.convert_text(token.delta)

# TODO: stream NextQuestionSuggestion
# Generate questions that user might be interested in
if next_question_settings.enable:
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if questions:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
)
# TODO: stream sources

# Yield the events from the event handler
Expand Down
1 change: 1 addition & 0 deletions templates/types/multiagent/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ llama-index = "^0.11.4"
fastapi = "^0.112.2"
python-dotenv = "^1.0.0"
uvicorn = { extras = ["standard"], version = "^0.23.2" }
pydantic-settings = "^2.4.0"
cachetools = "^5.3.3"
aiostream = "^0.5.2"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from app.api.routers.events import EventCallbackHandler
from app.api.routers.models import ChatData, Message, SourceNodes
from app.api.services.suggestion import NextQuestionSuggestion
from app.api.services.suggestion import NextQuestionSuggestion, next_question_settings


class VercelStreamResponse(StreamingResponse):
Expand Down Expand Up @@ -56,20 +56,21 @@ async def _chat_response_generator():
final_response += token
yield VercelStreamResponse.convert_text(token)

# Generate questions that user might interested to
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if len(questions) > 0:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
# Generate questions that user might be interested in
if next_question_settings.enable:
conversation = chat_data.messages + [
Message(role="assistant", content=final_response)
]
questions = await NextQuestionSuggestion.suggest_next_questions(
conversation
)
if questions:
yield VercelStreamResponse.convert_data(
{
"type": "suggested_questions",
"data": questions,
}
)
leehuwuj marked this conversation as resolved.
Show resolved Hide resolved

# the text_generator is the leading stream, once it's finished, also finish the event stream
event_handler.is_done = True
Expand Down
3 changes: 2 additions & 1 deletion templates/types/streaming/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ fastapi = "^0.109.1"
uvicorn = { extras = ["standard"], version = "^0.23.2" }
python-dotenv = "^1.0.0"
aiostream = "^0.5.2"
llama-index = "0.11.6"
pydantic-settings = "^2.4.0"
cachetools = "^5.3.3"
llama-index = "0.11.6"

[build-system]
requires = ["poetry-core"]
Expand Down
Loading