Skip to content

Commit 1f626e5

Browse files
authored
Merge pull request #25 from tisnik/use-llama-stack-to-retrieve-llm-output
Use llama-stack to retrieve LLM output
2 parents 2774db5 + fc1a86d commit 1f626e5

File tree

1 file changed

+28
-3
lines changed

1 file changed

+28
-3
lines changed

src/app/endpoints/query.py

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
import logging
44
from typing import Any
55

6+
from llama_stack_client import LlamaStackClient
7+
68
from fastapi import APIRouter, Request
79

10+
from configuration import configuration
811
from models.responses import QueryResponse
912

1013
logger = logging.getLogger(__name__)
@@ -19,6 +22,28 @@
1922
}
2023

2124

22-
@router.get("/query", responses=query_response)
23-
def info_endpoint_handler(request: Request) -> QueryResponse:
24-
return QueryResponse(query="foo", response="bar")
25+
@router.post("/query", responses=query_response)
26+
def info_endpoint_handler(request: Request, query: str) -> QueryResponse:
27+
llama_stack_config = configuration.llama_stack_configuration
28+
logger.info("LLama stack config: %s", llama_stack_config)
29+
client = LlamaStackClient(
30+
base_url=llama_stack_config.url, api_key=llama_stack_config.api_key
31+
)
32+
33+
# retrieve list of available models
34+
models = client.models.list()
35+
36+
# select the first LLM
37+
llm = next(m for m in models if m.model_type == "llm")
38+
model_id = llm.identifier
39+
40+
logger.info("Model: %s", model_id)
41+
42+
response = client.inference.chat_completion(
43+
model_id=model_id,
44+
messages=[
45+
{"role": "system", "content": "You are a helpful assistant."},
46+
{"role": "user", "content": query},
47+
],
48+
)
49+
return QueryResponse(query=query, response=str(response.completion_message.content))

0 commit comments

Comments
 (0)