Merge pull request #187 from manstis/LCORE-323

manstis · web-flow · commit 2cc5e9353a56 · 2025-07-09T08:10:52.000+01:00
LCORE-323: LlamaStackClient should be a singleton
diff --git a/src/app/endpoints/health.py b/src/app/endpoints/health.py
@@ -11,8 +11,7 @@
 from llama_stack.providers.datatypes import HealthStatus
 
 from fastapi import APIRouter, status, Response
-from client import get_llama_stack_client
-from configuration import configuration
+from client import LlamaStackClientHolder
 from models.responses import (
     LivenessResponse,
     ReadinessResponse,
@@ -30,9 +29,7 @@ def get_providers_health_statuses() -> list[ProviderHealthStatus]:
         List of provider health statuses.
     """
     try:
-        llama_stack_config = configuration.llama_stack_configuration
-
-        client = get_llama_stack_client(llama_stack_config)
+        client = LlamaStackClientHolder().get_client()
 
         providers = client.providers.list()
         logger.debug("Found %d providers", len(providers))
diff --git a/src/app/endpoints/models.py b/src/app/endpoints/models.py
@@ -6,7 +6,7 @@
 from llama_stack_client import APIConnectionError
 from fastapi import APIRouter, HTTPException, Request, status
 
-from client import get_llama_stack_client
+from client import LlamaStackClientHolder
 from configuration import configuration
 from models.responses import ModelsResponse
 from utils.endpoints import check_configuration_loaded
@@ -52,11 +52,12 @@ def models_endpoint_handler(_request: Request) -> ModelsResponse:
 
     try:
         # try to get Llama Stack client
-        client = get_llama_stack_client(llama_stack_configuration)
+        client = LlamaStackClientHolder().get_client()
         # retrieve models
         models = client.models.list()
         m = [dict(m) for m in models]
         return ModelsResponse(models=m)
+
     # connection to Llama Stack server
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
diff --git a/src/app/endpoints/query.py b/src/app/endpoints/query.py
@@ -21,7 +21,7 @@
 
 from fastapi import APIRouter, HTTPException, status, Depends
 
-from client import get_llama_stack_client
+from client import LlamaStackClientHolder
 from configuration import configuration
 from models.responses import QueryResponse
 from models.requests import QueryRequest, Attachment
@@ -104,7 +104,7 @@ def query_endpoint_handler(
 
     try:
         # try to get Llama Stack client
-        client = get_llama_stack_client(llama_stack_config)
+        client = LlamaStackClientHolder().get_client()
         model_id = select_model_id(client.models.list(), query_request)
         response, conversation_id = retrieve_response(
             client,
@@ -130,6 +130,7 @@ def query_endpoint_handler(
             )
 
         return QueryResponse(conversation_id=conversation_id, response=response)
+
     # connection to Llama Stack server
     except APIConnectionError as e:
         logger.error("Unable to connect to Llama Stack: %s", e)
diff --git a/src/app/endpoints/streaming_query.py b/src/app/endpoints/streaming_query.py
@@ -17,7 +17,7 @@
 from fastapi.responses import StreamingResponse
 
 from auth import get_auth_dependency
-from client import get_async_llama_stack_client
+from client import AsyncLlamaStackClientHolder
 from configuration import configuration
 from models.requests import QueryRequest
 from utils.endpoints import check_configuration_loaded, get_system_prompt
@@ -197,7 +197,7 @@ async def streaming_query_endpoint_handler(
 
     try:
         # try to get Llama Stack client
-        client = await get_async_llama_stack_client(llama_stack_config)
+        client = AsyncLlamaStackClientHolder().get_client()
         model_id = select_model_id(await client.models.list(), query_request)
         response, conversation_id = await retrieve_response(
             client,
diff --git a/src/client.py b/src/client.py
@@ -2,55 +2,86 @@
 
 import logging
 
+from typing import Optional
+
 from llama_stack.distribution.library_client import (
     AsyncLlamaStackAsLibraryClient,  # type: ignore
     LlamaStackAsLibraryClient,  # type: ignore
 )
 from llama_stack_client import AsyncLlamaStackClient, LlamaStackClient  # type: ignore
 from models.config import LLamaStackConfiguration
+from utils.types import Singleton
+
 
 logger = logging.getLogger(__name__)
 
 
-def get_llama_stack_client(
-    llama_stack_config: LLamaStackConfiguration,
-) -> LlamaStackClient:
-    """Retrieve Llama stack client according to configuration."""
-    if llama_stack_config.use_as_library_client is True:
-        if llama_stack_config.library_client_config_path is not None:
-            logger.info("Using Llama stack as library client")
-            client = LlamaStackAsLibraryClient(
-                llama_stack_config.library_client_config_path
+class LlamaStackClientHolder(metaclass=Singleton):
+    """Container for an initialised LlamaStackClient."""
+
+    _lsc: Optional[LlamaStackClient] = None
+
+    def load(self, llama_stack_config: LLamaStackConfiguration) -> None:
+        """Retrieve Llama stack client according to configuration."""
+        if llama_stack_config.use_as_library_client is True:
+            if llama_stack_config.library_client_config_path is not None:
+                logger.info("Using Llama stack as library client")
+                client = LlamaStackAsLibraryClient(
+                    llama_stack_config.library_client_config_path
+                )
+                client.initialize()
+                self._lsc = client
+            else:
+                msg = "Configuration problem: library_client_config_path option is not set"
+                logger.error(msg)
+                # tisnik: use custom exception there - with cause etc.
+                raise ValueError(msg)
+
+        else:
+            logger.info("Using Llama stack running as a service")
+            self._lsc = LlamaStackClient(
+                base_url=llama_stack_config.url, api_key=llama_stack_config.api_key
+            )
+
+    def get_client(self) -> LlamaStackClient:
+        """Return an initialised LlamaStackClient."""
+        if not self._lsc:
+            raise RuntimeError(
+                "LlamaStackClient has not been initialised. Ensure 'load(..)' has been called."
             )
-            client.initialize()
-            return client
-        msg = "Configuration problem: library_client_config_path option is not set"
-        logger.error(msg)
-        # tisnik: use custom exception there - with cause etc.
-        raise Exception(msg)  # pylint: disable=broad-exception-raised
-    logger.info("Using Llama stack running as a service")
-    return LlamaStackClient(
-        base_url=llama_stack_config.url, api_key=llama_stack_config.api_key
-    )
-
-
-async def get_async_llama_stack_client(
-    llama_stack_config: LLamaStackConfiguration,
-) -> AsyncLlamaStackClient:
-    """Retrieve Async Llama stack client according to configuration."""
-    if llama_stack_config.use_as_library_client is True:
-        if llama_stack_config.library_client_config_path is not None:
-            logger.info("Using Llama stack as library client")
-            client = AsyncLlamaStackAsLibraryClient(
-                llama_stack_config.library_client_config_path
+        return self._lsc
+
+
+class AsyncLlamaStackClientHolder(metaclass=Singleton):
+    """Container for an initialised AsyncLlamaStackClient."""
+
+    _lsc: Optional[AsyncLlamaStackClient] = None
+
+    async def load(self, llama_stack_config: LLamaStackConfiguration) -> None:
+        """Retrieve Async Llama stack client according to configuration."""
+        if llama_stack_config.use_as_library_client is True:
+            if llama_stack_config.library_client_config_path is not None:
+                logger.info("Using Llama stack as library client")
+                client = AsyncLlamaStackAsLibraryClient(
+                    llama_stack_config.library_client_config_path
+                )
+                await client.initialize()
+                self._lsc = client
+            else:
+                msg = "Configuration problem: library_client_config_path option is not set"
+                logger.error(msg)
+                # tisnik: use custom exception there - with cause etc.
+                raise ValueError(msg)
+        else:
+            logger.info("Using Llama stack running as a service")
+            self._lsc = AsyncLlamaStackClient(
+                base_url=llama_stack_config.url, api_key=llama_stack_config.api_key
+            )
+
+    def get_client(self) -> AsyncLlamaStackClient:
+        """Return an initialised AsyncLlamaStackClient."""
+        if not self._lsc:
+            raise RuntimeError(
+                "AsyncLlamaStackClient has not been initialised. Ensure 'load(..)' has been called."
             )
-            await client.initialize()
-            return client
-        msg = "Configuration problem: library_client_config_path option is not set"
-        logger.error(msg)
-        # tisnik: use custom exception there - with cause etc.
-        raise Exception(msg)  # pylint: disable=broad-exception-raised
-    logger.info("Using Llama stack running as a service")
-    return AsyncLlamaStackClient(
-        base_url=llama_stack_config.url, api_key=llama_stack_config.api_key
-    )
+        return self._lsc
diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py
@@ -5,13 +5,13 @@
 """
 
 from argparse import ArgumentParser
+import asyncio
 import logging
-
 from rich.logging import RichHandler
 
 from runners.uvicorn import start_uvicorn
 from configuration import configuration
-
+from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
 
 FORMAT = "%(message)s"
 logging.basicConfig(
@@ -61,6 +61,12 @@ def main() -> None:
     logger.info(
         "Llama stack configuration: %s", configuration.llama_stack_configuration
     )
+    logger.info("Creating LlamaStackClient")
+    LlamaStackClientHolder().load(configuration.configuration.llama_stack)
+    logger.info("Creating AsyncLlamaStackClient")
+    asyncio.run(
+        AsyncLlamaStackClientHolder().load(configuration.configuration.llama_stack)
+    )
 
     if args.dump_configuration:
         configuration.configuration.dump()
diff --git a/src/utils/common.py b/src/utils/common.py
@@ -3,14 +3,13 @@
 from typing import Any, List, cast
 from logging import Logger
 
-from llama_stack_client import LlamaStackClient
+from llama_stack_client import LlamaStackClient, AsyncLlamaStackClient
 
 from llama_stack.distribution.library_client import (
-    LlamaStackAsLibraryClient,
     AsyncLlamaStackAsLibraryClient,
 )
 
-from client import get_llama_stack_client
+from client import LlamaStackClientHolder, AsyncLlamaStackClientHolder
 from models.config import Configuration, ModelContextProtocolServer
 
 
@@ -39,24 +38,19 @@ async def register_mcp_servers_async(
 
     if configuration.llama_stack.use_as_library_client:
         # Library client - use async interface
-        # config.py validation ensures library_client_config_path is not None
-        # when use_as_library_client is True
-        config_path = cast(str, configuration.llama_stack.library_client_config_path)
-        client = LlamaStackAsLibraryClient(config_path)
-        await client.async_client.initialize()
-
-        await _register_mcp_toolgroups_async(
-            client.async_client, configuration.mcp_servers, logger
+        client = cast(
+            AsyncLlamaStackAsLibraryClient, AsyncLlamaStackClientHolder().get_client()
         )
+        await client.initialize()
+        await _register_mcp_toolgroups_async(client, configuration.mcp_servers, logger)
     else:
         # Service client - use sync interface
-        client = get_llama_stack_client(configuration.llama_stack)
-
+        client = LlamaStackClientHolder().get_client()
         _register_mcp_toolgroups_sync(client, configuration.mcp_servers, logger)
 
 
 async def _register_mcp_toolgroups_async(
-    client: AsyncLlamaStackAsLibraryClient,
+    client: AsyncLlamaStackClient,
     mcp_servers: List[ModelContextProtocolServer],
     logger: Logger,
 ) -> None:
diff --git a/src/utils/types.py b/src/utils/types.py
@@ -0,0 +1,13 @@
+"""Common types for the project."""
+
+
+class Singleton(type):
+    """Metaclass for Singleton support."""
+
+    _instances = {}  # type: ignore
+
+    def __call__(cls, *args, **kwargs):  # type: ignore
+        """Ensure a single instance is created."""
+        if cls not in cls._instances:
+            cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+        return cls._instances[cls]
diff --git a/tests/unit/app/endpoints/test_health.py b/tests/unit/app/endpoints/test_health.py
@@ -98,14 +98,11 @@ class TestGetProvidersHealthStatuses:
     def test_get_providers_health_statuses(self, mocker):
         """Test get_providers_health_statuses with healthy providers."""
         # Mock the imports
-        mock_get_llama_stack_client = mocker.patch(
-            "app.endpoints.health.get_llama_stack_client"
-        )
-        mock_configuration = mocker.patch("app.endpoints.health.configuration")
+        mock_lsc = mocker.patch("client.LlamaStackClientHolder.get_client")
 
         # Mock the client and its methods
         mock_client = mocker.Mock()
-        mock_get_llama_stack_client.return_value = mock_client
+        mock_lsc.return_value = mock_client
 
         # Mock providers.list() to return providers with health
         mock_provider_1 = mocker.Mock()
@@ -136,9 +133,6 @@ def test_get_providers_health_statuses(self, mocker):
         ]
 
         # Mock configuration
-        mock_llama_stack_config = mocker.Mock()
-        mock_configuration.llama_stack_configuration = mock_llama_stack_config
-
         result = get_providers_health_statuses()
 
         assert len(result) == 3
@@ -155,17 +149,10 @@ def test_get_providers_health_statuses(self, mocker):
     def test_get_providers_health_statuses_connection_error(self, mocker):
         """Test get_providers_health_statuses when connection fails."""
         # Mock the imports
-        mock_get_llama_stack_client = mocker.patch(
-            "app.endpoints.health.get_llama_stack_client"
-        )
-        mock_configuration = mocker.patch("app.endpoints.health.configuration")
-
-        # Mock configuration
-        mock_llama_stack_config = mocker.Mock()
-        mock_configuration.llama_stack_configuration = mock_llama_stack_config
+        mock_lsc = mocker.patch("client.LlamaStackClientHolder.get_client")
 
         # Mock get_llama_stack_client to raise an exception
-        mock_get_llama_stack_client.side_effect = Exception("Connection error")
+        mock_lsc.side_effect = Exception("Connection error")
 
         result = get_providers_health_statuses()
 
diff --git a/tests/unit/app/endpoints/test_models.py b/tests/unit/app/endpoints/test_models.py
@@ -124,11 +124,10 @@ def test_models_endpoint_handler_unable_to_retrieve_models_list(mocker):
     # Mock the LlamaStack client
     mock_client = Mock()
     mock_client.models.list.return_value = []
-
-    # Mock the LlamaStack client (shouldn't be called directly)
-    mocker.patch(
-        "app.endpoints.models.get_llama_stack_client", return_value=mock_client
-    )
+    mock_lsc = mocker.patch("client.LlamaStackClientHolder.get_client")
+    mock_lsc.return_value = mock_client
+    mock_config = mocker.Mock()
+    mocker.patch("app.endpoints.models.configuration", mock_config)
 
     request = None
     response = models_endpoint_handler(request)
diff --git a/tests/unit/app/endpoints/test_query.py b/tests/unit/app/endpoints/test_query.py
@@ -99,6 +99,8 @@ def test_is_transcripts_disabled(setup_configuration, mocker):
 def _test_query_endpoint_handler(mocker, store_transcript=False):
     """Test the query endpoint handler."""
     mock_client = mocker.Mock()
+    mock_lsc = mocker.patch("client.LlamaStackClientHolder.get_client")
+    mock_lsc.return_value = mock_client
     mock_client.models.list.return_value = [
         mocker.Mock(identifier="model1", model_type="llm", provider_id="provider1"),
         mocker.Mock(identifier="model2", model_type="llm", provider_id="provider2"),
@@ -113,7 +115,7 @@ def _test_query_endpoint_handler(mocker, store_transcript=False):
     llm_response = "LLM answer"
     conversation_id = "fake_conversation_id"
     query = "What is OpenStack?"
-    mocker.patch("app.endpoints.query.get_llama_stack_client", return_value=mock_client)
+
     mocker.patch(
         "app.endpoints.query.retrieve_response",
         return_value=(llm_response, conversation_id),
@@ -838,10 +840,8 @@ def test_query_endpoint_handler_on_connection_error(mocker):
     query_request = QueryRequest(query=query)
 
     # simulate situation when it is not possible to connect to Llama Stack
-    mocker.patch(
-        "app.endpoints.query.get_llama_stack_client",
-        side_effect=APIConnectionError(request=query_request),
-    )
+    mock_lsc = mocker.Mock()
+    mock_lsc.get_client.side_effect = APIConnectionError(request=query_request)
 
     with pytest.raises(Exception):
         query_endpoint_handler(query_request)
diff --git a/tests/unit/app/endpoints/test_streaming_query.py b/tests/unit/app/endpoints/test_streaming_query.py
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
diff --git a/tests/unit/utils/test_common.py b/tests/unit/utils/test_common.py