diff --git a/src/client.py b/src/client.py index cb9d3ad32..fd7e3d1db 100644 --- a/src/client.py +++ b/src/client.py @@ -21,7 +21,28 @@ class AsyncLlamaStackClientHolder(metaclass=Singleton): _lsc: Optional[AsyncLlamaStackClient] = None async def load(self, llama_stack_config: LlamaStackConfiguration) -> None: - """Retrieve Async Llama stack client according to configuration.""" + """ + Load and initialize the holder's AsyncLlamaStackClient according to the provided config. + + If `llama_stack_config.use_as_library_client` is set to True, a + library-mode client is created using + `llama_stack_config.library_client_config_path` and initialized before + being stored. + + Otherwise, a service-mode client is created using + `llama_stack_config.url` and optional `llama_stack_config.api_key`. + The created client is stored on the instance for later retrieval via + `get_client()`. + + Parameters: + llama_stack_config (LlamaStackConfiguration): Configuration that + selects client mode and provides either a library client config + path or service connection details (URL and optional API key). + + Raises: + ValueError: If `use_as_library_client` is True but + `library_client_config_path` is not set. + """ if llama_stack_config.use_as_library_client is True: if llama_stack_config.library_client_config_path is not None: logger.info("Using Llama stack as library client") @@ -47,7 +68,15 @@ async def load(self, llama_stack_config: LlamaStackConfiguration) -> None: ) def get_client(self) -> AsyncLlamaStackClient: - """Return an initialised AsyncLlamaStackClient.""" + """ + Get the initialized client held by this holder. + + Returns: + AsyncLlamaStackClient: The initialized client instance. + + Raises: + RuntimeError: If the client has not been initialized; call `load(...)` first. + """ if not self._lsc: raise RuntimeError( "AsyncLlamaStackClient has not been initialised. Ensure 'load(..)' has been called." diff --git a/src/configuration.py b/src/configuration.py index b177c0ce8..ebcd3bd7d 100644 --- a/src/configuration.py +++ b/src/configuration.py @@ -49,14 +49,23 @@ def __new__(cls, *args: Any, **kwargs: Any) -> "AppConfig": return cls._instance def __init__(self) -> None: - """Initialize the class instance.""" + """Initialize the class instance. + + Sets placeholders for the loaded configuration and lazily-created + runtime resources (conversation cache, quota limiters, and token usage + history). + """ self._configuration: Optional[Configuration] = None self._conversation_cache: Optional[Cache] = None self._quota_limiters: list[QuotaLimiter] = [] self._token_usage_history: Optional[TokenUsageHistory] = None def load_configuration(self, filename: str) -> None: - """Load configuration from YAML file.""" + """Load configuration from YAML file. + + Parameters: + filename (str): Path to the YAML configuration file to load. + """ with open(filename, encoding="utf-8") as fin: config_dict = yaml.safe_load(fin) config_dict = replace_env_vars(config_dict) @@ -64,51 +73,105 @@ def load_configuration(self, filename: str) -> None: self.init_from_dict(config_dict) def init_from_dict(self, config_dict: dict[Any, Any]) -> None: - """Initialize configuration from a dictionary.""" + """Initialize configuration from a dictionary. + + Parameters: + config_dict (dict[Any, Any]): Mapping of configuration values + (typically parsed from YAML) to construct a new Configuration + instance. The method sets the internal configuration to + Configuration(**config_dict) and clears any cached conversation + cache, quota limiters, and token usage history so they will be + reinitialized on next access. + """ # clear cached values when configuration changes self._conversation_cache = None self._quota_limiters = [] + self._token_usage_history = None # now it is possible to re-read configuration self._configuration = Configuration(**config_dict) @property def configuration(self) -> Configuration: - """Return the whole configuration.""" + """Return the whole configuration. + + Returns: + Configuration: The loaded configuration object. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration @property def service_configuration(self) -> ServiceConfiguration: - """Return service configuration.""" + """Return service configuration. + + Returns: + ServiceConfiguration: The service configuration stored in the current configuration. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.service @property def llama_stack_configuration(self) -> LlamaStackConfiguration: - """Return Llama stack configuration.""" + """Return Llama stack configuration. + + Returns: + LlamaStackConfiguration: The configured Llama stack settings. + + Raises: + LogicError: If the application configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.llama_stack @property def user_data_collection_configuration(self) -> UserDataCollection: - """Return user data collection configuration.""" + """Return user data collection configuration. + + Returns: + UserDataCollection: The configured UserDataCollection object from + the loaded configuration. + + Raises: + LogicError: If the application configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.user_data_collection @property def mcp_servers(self) -> list[ModelContextProtocolServer]: - """Return model context protocol servers configuration.""" + """Return model context protocol servers configuration. + + Returns: + list[ModelContextProtocolServer]: The list of configured MCP servers. + + Raises: + LogicError: If the configuration is not loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.mcp_servers @property def authentication_configuration(self) -> AuthenticationConfiguration: - """Return authentication configuration.""" + """Return authentication configuration. + + Returns: + AuthenticationConfiguration: The authentication configuration from + the loaded application configuration. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") @@ -116,7 +179,16 @@ def authentication_configuration(self) -> AuthenticationConfiguration: @property def authorization_configuration(self) -> AuthorizationConfiguration: - """Return authorization configuration or default no-op configuration.""" + """Return authorization configuration or default no-op configuration. + + Returns: + AuthorizationConfiguration: The configured authorization settings, + or a default no-op AuthorizationConfiguration when none is + configured. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") @@ -127,42 +199,87 @@ def authorization_configuration(self) -> AuthorizationConfiguration: @property def customization(self) -> Optional[Customization]: - """Return customization configuration.""" + """Return customization configuration. + + Returns: + customization (Optional[Customization]): The customization + configuration if present, otherwise None. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.customization @property def inference(self) -> InferenceConfiguration: - """Return inference configuration.""" + """Return inference configuration. + + Returns: + InferenceConfiguration: The inference configuration from the loaded + application configuration. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.inference @property def conversation_cache_configuration(self) -> ConversationHistoryConfiguration: - """Return conversation cache configuration.""" + """Return conversation cache configuration. + + Returns: + ConversationHistoryConfiguration: The conversation cache + configuration from the loaded application configuration. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.conversation_cache @property def database_configuration(self) -> DatabaseConfiguration: - """Return database configuration.""" + """Return database configuration. + + Returns: + DatabaseConfiguration: The configured database settings. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.database @property def quota_handlers_configuration(self) -> QuotaHandlersConfiguration: - """Return quota handlers configuration.""" + """Return quota handlers configuration. + + Returns: + quota_handlers (QuotaHandlersConfiguration): The configured quota handlers. + + Raises: + LogicError: If configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") return self._configuration.quota_handlers @property def conversation_cache(self) -> Cache: - """Return the conversation cache.""" + """Return the conversation cache. + + Returns: + Cache: The conversation cache instance configured by the loaded configuration. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") if self._conversation_cache is None: @@ -173,7 +290,14 @@ def conversation_cache(self) -> Cache: @property def quota_limiters(self) -> list[QuotaLimiter]: - """Return list of all setup quota limiters.""" + """Return list of all setup quota limiters. + + Returns: + list[QuotaLimiter]: The quota limiter instances configured for the application. + + Raises: + LogicError: If the configuration has not been loaded. + """ if self._configuration is None: raise LogicError("logic error: configuration is not loaded") if not self._quota_limiters: diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py index 5cdb908e8..314b63824 100644 --- a/src/lightspeed_stack.py +++ b/src/lightspeed_stack.py @@ -25,7 +25,20 @@ def create_argument_parser() -> ArgumentParser: - """Create and configure argument parser object.""" + """Create and configure argument parser object. + + The parser includes these options: + - -v / --verbose: enable verbose output + - -d / --dump-configuration: dump the loaded configuration to JSON and exit + - -c / --config: path to the configuration file (default "lightspeed-stack.yaml") + - -g / --generate-llama-stack-configuration: generate a Llama Stack + configuration from the service configuration + - -i / --input-config-file: Llama Stack input configuration filename (default "run.yaml") + - -o / --output-config-file: Llama Stack output configuration filename (default "run_.yaml") + + Returns: + Configured ArgumentParser for parsing the service CLI options. + """ parser = ArgumentParser() parser.add_argument( "-v", @@ -77,7 +90,23 @@ def create_argument_parser() -> ArgumentParser: def main() -> None: - """Entry point to the web service.""" + """Entry point to the web service. + + Start the Lightspeed Core Stack service process based on CLI flags and configuration. + + Parses command-line arguments, loads the configured settings, and then: + - If --dump-configuration is provided, writes the active configuration to + configuration.json and exits (exits with status 1 on failure). + - If --generate-llama-stack-configuration is provided, generates and stores + the Llama Stack configuration to the specified output file and exits + (exits with status 1 on failure). + - Otherwise, sets LIGHTSPEED_STACK_CONFIG_PATH for worker processes, starts + the quota scheduler, and starts the Uvicorn web service. + + Raises: + SystemExit: when configuration dumping or Llama Stack generation fails + (exits with status 1). + """ logger.info("Lightspeed Core Stack startup") parser = create_argument_parser() args = parser.parse_args() diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py index a4ce37269..ca56fa452 100644 --- a/src/llama_stack_configuration.py +++ b/src/llama_stack_configuration.py @@ -16,7 +16,17 @@ class YamlDumper(yaml.Dumper): """Custom YAML dumper with proper indentation levels.""" def increase_indent(self, flow: bool = False, indentless: bool = False) -> None: - """Control the indentation level of formatted YAML output.""" + """Control the indentation level of formatted YAML output. + + Force block-style indentation for emitted YAML by ensuring the dumper + never uses "indentless" indentation. + + Parameters: + flow (bool): Whether the YAML flow style is being used; forwarded + to the base implementation. + indentless (bool): Ignored — this implementation always enforces + indented block style. + """ _ = indentless return super().increase_indent(flow, False) @@ -24,7 +34,24 @@ def increase_indent(self, flow: bool = False, indentless: bool = False) -> None: def generate_configuration( input_file: str, output_file: str, config: Configuration ) -> None: - """Generate new Llama Stack configuration.""" + """Generate new Llama Stack configuration. + + Update a Llama Stack YAML configuration file by inserting BYOK RAG vector + DB and provider entries when present. + + Reads the YAML configuration from `input_file`, and if `config.byok_rag` + contains items, updates or creates the `vector_dbs` and + `providers.vector_io` sections (preserving any existing entries) based on + that BYOK RAG data, then writes the resulting configuration to + `output_file`. If `config.byok_rag` is empty, the input configuration is + written unchanged to `output_file`. + + Parameters: + input_file (str): Path to the existing Llama Stack YAML configuration to read. + output_file (str): Path where the updated YAML configuration will be written. + config (Configuration): Configuration object whose `byok_rag` list + supplies BYOK RAG entries to be added. + """ logger.info("Reading Llama Stack configuration from file %s", input_file) with open(input_file, "r", encoding="utf-8") as file: @@ -52,7 +79,24 @@ def generate_configuration( def construct_vector_dbs_section( ls_config: dict[str, Any], byok_rag: list[ByokRag] ) -> list[dict[str, Any]]: - """Construct vector_dbs section in Llama Stack configuration file.""" + """Construct vector_dbs section in Llama Stack configuration file. + + Builds the vector_dbs section for a Llama Stack configuration. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration mapping + used as the base; existing `vector_dbs` entries are preserved if + present. + byok_rag (list[ByokRag]): List of BYOK RAG definitions to be added to + the `vector_dbs` section. + + Returns: + list[dict[str, Any]]: The `vector_dbs` list where each entry is a mapping with keys: + - `vector_db_id`: identifier of the vector database + - `provider_id`: provider identifier prefixed with `"byok_"` + - `embedding_model`: name of the embedding model + - `embedding_dimension`: embedding vector dimensionality + """ output = [] # fill-in existing vector_dbs entries @@ -80,7 +124,27 @@ def construct_vector_dbs_section( def construct_vector_io_providers_section( ls_config: dict[str, Any], byok_rag: list[ByokRag] ) -> list[dict[str, Any]]: - """Construct providers/vector_io section in Llama Stack configuration file.""" + """Construct providers/vector_io section in Llama Stack configuration file. + + Builds the providers/vector_io list for a Llama Stack configuration by + preserving existing entries and appending providers derived from BYOK RAG + entries. + + Parameters: + ls_config (dict[str, Any]): Existing Llama Stack configuration + dictionary; if it contains providers.vector_io, those entries are used + as the starting list. + byok_rag (list[ByokRag]): List of BYOK RAG specifications to convert + into provider entries. + + Returns: + list[dict[str, Any]]: The resulting providers/vector_io list containing + the original entries (if any) plus one entry per item in `byok_rag`. + Each appended entry has `provider_id` set to "byok_", + `provider_type` set from the RAG item, and a `config` with a `kvstore` + pointing to ".llama/.db", `namespace` as None, and `type` + "sqlite". + """ output = [] # fill-in existing vector_io entries diff --git a/src/quota/token_usage_history.py b/src/quota/token_usage_history.py index 6ffa658c1..591058fe5 100644 --- a/src/quota/token_usage_history.py +++ b/src/quota/token_usage_history.py @@ -7,6 +7,8 @@ import sqlite3 from datetime import datetime +from typing import Any + import psycopg2 from log import get_logger @@ -19,7 +21,11 @@ CONSUME_TOKENS_FOR_USER_SQLITE, ) -from models.config import QuotaHandlersConfiguration +from models.config import ( + QuotaHandlersConfiguration, + SQLiteDatabaseConfiguration, + PostgreSQLDatabaseConfiguration, +) from utils.connection_decorator import connection logger = get_logger(__name__) @@ -44,9 +50,13 @@ def __init__(self, configuration: QuotaHandlersConfiguration) -> None: """ # store the configuration, it will be used # by reconnection logic later, if needed - self.sqlite_connection_config = configuration.sqlite - self.postgres_connection_config = configuration.postgres - self.connection = None + self.sqlite_connection_config: SQLiteDatabaseConfiguration | None = ( + configuration.sqlite + ) + self.postgres_connection_config: PostgreSQLDatabaseConfiguration | None = ( + configuration.postgres + ) + self.connection: Any | None = None # initialize connection to DB self.connect()