diff --git a/src/client.py b/src/client.py
index cb9d3ad32..fd7e3d1db 100644
--- a/src/client.py
+++ b/src/client.py
@@ -21,7 +21,28 @@ class AsyncLlamaStackClientHolder(metaclass=Singleton):
     _lsc: Optional[AsyncLlamaStackClient] = None
 
     async def load(self, llama_stack_config: LlamaStackConfiguration) -> None:
-        """Retrieve Async Llama stack client according to configuration."""
+        """
+        Load and initialize the holder's AsyncLlamaStackClient according to the provided config.
+
+        If `llama_stack_config.use_as_library_client` is set to True, a
+        library-mode client is created using
+        `llama_stack_config.library_client_config_path` and initialized before
+        being stored.
+
+        Otherwise, a service-mode client is created using
+        `llama_stack_config.url` and optional `llama_stack_config.api_key`.
+        The created client is stored on the instance for later retrieval via
+        `get_client()`.
+
+        Parameters:
+            llama_stack_config (LlamaStackConfiguration): Configuration that
+            selects client mode and provides either a library client config
+            path or service connection details (URL and optional API key).
+
+        Raises:
+            ValueError: If `use_as_library_client` is True but
+            `library_client_config_path` is not set.
+        """
         if llama_stack_config.use_as_library_client is True:
             if llama_stack_config.library_client_config_path is not None:
                 logger.info("Using Llama stack as library client")
@@ -47,7 +68,15 @@ async def load(self, llama_stack_config: LlamaStackConfiguration) -> None:
             )
 
     def get_client(self) -> AsyncLlamaStackClient:
-        """Return an initialised AsyncLlamaStackClient."""
+        """
+        Get the initialized client held by this holder.
+
+        Returns:
+            AsyncLlamaStackClient: The initialized client instance.
+
+        Raises:
+            RuntimeError: If the client has not been initialized; call `load(...)` first.
+        """
         if not self._lsc:
             raise RuntimeError(
                 "AsyncLlamaStackClient has not been initialised. Ensure 'load(..)' has been called."
diff --git a/src/configuration.py b/src/configuration.py
index b177c0ce8..ebcd3bd7d 100644
--- a/src/configuration.py
+++ b/src/configuration.py
@@ -49,14 +49,23 @@ def __new__(cls, *args: Any, **kwargs: Any) -> "AppConfig":
         return cls._instance
 
     def __init__(self) -> None:
-        """Initialize the class instance."""
+        """Initialize the class instance.
+
+        Sets placeholders for the loaded configuration and lazily-created
+        runtime resources (conversation cache, quota limiters, and token usage
+        history).
+        """
         self._configuration: Optional[Configuration] = None
         self._conversation_cache: Optional[Cache] = None
         self._quota_limiters: list[QuotaLimiter] = []
         self._token_usage_history: Optional[TokenUsageHistory] = None
 
     def load_configuration(self, filename: str) -> None:
-        """Load configuration from YAML file."""
+        """Load configuration from YAML file.
+
+        Parameters:
+            filename (str): Path to the YAML configuration file to load.
+        """
         with open(filename, encoding="utf-8") as fin:
             config_dict = yaml.safe_load(fin)
             config_dict = replace_env_vars(config_dict)
@@ -64,51 +73,105 @@ def load_configuration(self, filename: str) -> None:
             self.init_from_dict(config_dict)
 
     def init_from_dict(self, config_dict: dict[Any, Any]) -> None:
-        """Initialize configuration from a dictionary."""
+        """Initialize configuration from a dictionary.
+
+        Parameters:
+            config_dict (dict[Any, Any]): Mapping of configuration values
+            (typically parsed from YAML) to construct a new Configuration
+            instance. The method sets the internal configuration to
+            Configuration(**config_dict) and clears any cached conversation
+            cache, quota limiters, and token usage history so they will be
+            reinitialized on next access.
+        """
         # clear cached values when configuration changes
         self._conversation_cache = None
         self._quota_limiters = []
+        self._token_usage_history = None
         # now it is possible to re-read configuration
         self._configuration = Configuration(**config_dict)
 
     @property
     def configuration(self) -> Configuration:
-        """Return the whole configuration."""
+        """Return the whole configuration.
+
+        Returns:
+            Configuration: The loaded configuration object.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration
 
     @property
     def service_configuration(self) -> ServiceConfiguration:
-        """Return service configuration."""
+        """Return service configuration.
+
+        Returns:
+            ServiceConfiguration: The service configuration stored in the current configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.service
 
     @property
     def llama_stack_configuration(self) -> LlamaStackConfiguration:
-        """Return Llama stack configuration."""
+        """Return Llama stack configuration.
+
+        Returns:
+            LlamaStackConfiguration: The configured Llama stack settings.
+
+        Raises:
+            LogicError: If the application configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.llama_stack
 
     @property
     def user_data_collection_configuration(self) -> UserDataCollection:
-        """Return user data collection configuration."""
+        """Return user data collection configuration.
+
+        Returns:
+            UserDataCollection: The configured UserDataCollection object from
+            the loaded configuration.
+
+        Raises:
+            LogicError: If the application configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.user_data_collection
 
     @property
     def mcp_servers(self) -> list[ModelContextProtocolServer]:
-        """Return model context protocol servers configuration."""
+        """Return model context protocol servers configuration.
+
+        Returns:
+            list[ModelContextProtocolServer]: The list of configured MCP servers.
+
+        Raises:
+            LogicError: If the configuration is not loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.mcp_servers
 
     @property
     def authentication_configuration(self) -> AuthenticationConfiguration:
-        """Return authentication configuration."""
+        """Return authentication configuration.
+
+        Returns:
+            AuthenticationConfiguration: The authentication configuration from
+            the loaded application configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
 
@@ -116,7 +179,16 @@ def authentication_configuration(self) -> AuthenticationConfiguration:
 
     @property
     def authorization_configuration(self) -> AuthorizationConfiguration:
-        """Return authorization configuration or default no-op configuration."""
+        """Return authorization configuration or default no-op configuration.
+
+        Returns:
+            AuthorizationConfiguration: The configured authorization settings,
+            or a default no-op AuthorizationConfiguration when none is
+            configured.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
 
@@ -127,42 +199,87 @@ def authorization_configuration(self) -> AuthorizationConfiguration:
 
     @property
     def customization(self) -> Optional[Customization]:
-        """Return customization configuration."""
+        """Return customization configuration.
+
+        Returns:
+            customization (Optional[Customization]): The customization
+            configuration if present, otherwise None.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.customization
 
     @property
     def inference(self) -> InferenceConfiguration:
-        """Return inference configuration."""
+        """Return inference configuration.
+
+        Returns:
+            InferenceConfiguration: The inference configuration from the loaded
+            application configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.inference
 
     @property
     def conversation_cache_configuration(self) -> ConversationHistoryConfiguration:
-        """Return conversation cache configuration."""
+        """Return conversation cache configuration.
+
+        Returns:
+            ConversationHistoryConfiguration: The conversation cache
+            configuration from the loaded application configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.conversation_cache
 
     @property
     def database_configuration(self) -> DatabaseConfiguration:
-        """Return database configuration."""
+        """Return database configuration.
+
+        Returns:
+            DatabaseConfiguration: The configured database settings.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.database
 
     @property
     def quota_handlers_configuration(self) -> QuotaHandlersConfiguration:
-        """Return quota handlers configuration."""
+        """Return quota handlers configuration.
+
+        Returns:
+            quota_handlers (QuotaHandlersConfiguration): The configured quota handlers.
+
+        Raises:
+            LogicError: If configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         return self._configuration.quota_handlers
 
     @property
     def conversation_cache(self) -> Cache:
-        """Return the conversation cache."""
+        """Return the conversation cache.
+
+        Returns:
+            Cache: The conversation cache instance configured by the loaded configuration.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         if self._conversation_cache is None:
@@ -173,7 +290,14 @@ def conversation_cache(self) -> Cache:
 
     @property
     def quota_limiters(self) -> list[QuotaLimiter]:
-        """Return list of all setup quota limiters."""
+        """Return list of all setup quota limiters.
+
+        Returns:
+            list[QuotaLimiter]: The quota limiter instances configured for the application.
+
+        Raises:
+            LogicError: If the configuration has not been loaded.
+        """
         if self._configuration is None:
             raise LogicError("logic error: configuration is not loaded")
         if not self._quota_limiters:
diff --git a/src/lightspeed_stack.py b/src/lightspeed_stack.py
index 5cdb908e8..314b63824 100644
--- a/src/lightspeed_stack.py
+++ b/src/lightspeed_stack.py
@@ -25,7 +25,20 @@
 
 
 def create_argument_parser() -> ArgumentParser:
-    """Create and configure argument parser object."""
+    """Create and configure argument parser object.
+
+    The parser includes these options:
+    - -v / --verbose: enable verbose output
+    - -d / --dump-configuration: dump the loaded configuration to JSON and exit
+    - -c / --config: path to the configuration file (default "lightspeed-stack.yaml")
+    - -g / --generate-llama-stack-configuration: generate a Llama Stack
+                                                 configuration from the service configuration
+    - -i / --input-config-file: Llama Stack input configuration filename (default "run.yaml")
+    - -o / --output-config-file: Llama Stack output configuration filename (default "run_.yaml")
+
+    Returns:
+        Configured ArgumentParser for parsing the service CLI options.
+    """
     parser = ArgumentParser()
     parser.add_argument(
         "-v",
@@ -77,7 +90,23 @@ def create_argument_parser() -> ArgumentParser:
 
 
 def main() -> None:
-    """Entry point to the web service."""
+    """Entry point to the web service.
+
+    Start the Lightspeed Core Stack service process based on CLI flags and configuration.
+
+    Parses command-line arguments, loads the configured settings, and then:
+    - If --dump-configuration is provided, writes the active configuration to
+      configuration.json and exits (exits with status 1 on failure).
+    - If --generate-llama-stack-configuration is provided, generates and stores
+      the Llama Stack configuration to the specified output file and exits
+      (exits with status 1 on failure).
+    - Otherwise, sets LIGHTSPEED_STACK_CONFIG_PATH for worker processes, starts
+      the quota scheduler, and starts the Uvicorn web service.
+
+    Raises:
+        SystemExit: when configuration dumping or Llama Stack generation fails
+                    (exits with status 1).
+    """
     logger.info("Lightspeed Core Stack startup")
     parser = create_argument_parser()
     args = parser.parse_args()
diff --git a/src/llama_stack_configuration.py b/src/llama_stack_configuration.py
index a4ce37269..ca56fa452 100644
--- a/src/llama_stack_configuration.py
+++ b/src/llama_stack_configuration.py
@@ -16,7 +16,17 @@ class YamlDumper(yaml.Dumper):
     """Custom YAML dumper with proper indentation levels."""
 
     def increase_indent(self, flow: bool = False, indentless: bool = False) -> None:
-        """Control the indentation level of formatted YAML output."""
+        """Control the indentation level of formatted YAML output.
+
+        Force block-style indentation for emitted YAML by ensuring the dumper
+        never uses "indentless" indentation.
+
+        Parameters:
+            flow (bool): Whether the YAML flow style is being used; forwarded
+            to the base implementation.
+            indentless (bool): Ignored — this implementation always enforces
+            indented block style.
+        """
         _ = indentless
         return super().increase_indent(flow, False)
 
@@ -24,7 +34,24 @@ def increase_indent(self, flow: bool = False, indentless: bool = False) -> None:
 def generate_configuration(
     input_file: str, output_file: str, config: Configuration
 ) -> None:
-    """Generate new Llama Stack configuration."""
+    """Generate new Llama Stack configuration.
+
+    Update a Llama Stack YAML configuration file by inserting BYOK RAG vector
+    DB and provider entries when present.
+
+    Reads the YAML configuration from `input_file`, and if `config.byok_rag`
+    contains items, updates or creates the `vector_dbs` and
+    `providers.vector_io` sections (preserving any existing entries) based on
+    that BYOK RAG data, then writes the resulting configuration to
+    `output_file`. If `config.byok_rag` is empty, the input configuration is
+    written unchanged to `output_file`.
+
+    Parameters:
+        input_file (str): Path to the existing Llama Stack YAML configuration to read.
+        output_file (str): Path where the updated YAML configuration will be written.
+        config (Configuration): Configuration object whose `byok_rag` list
+        supplies BYOK RAG entries to be added.
+    """
     logger.info("Reading Llama Stack configuration from file %s", input_file)
 
     with open(input_file, "r", encoding="utf-8") as file:
@@ -52,7 +79,24 @@ def generate_configuration(
 def construct_vector_dbs_section(
     ls_config: dict[str, Any], byok_rag: list[ByokRag]
 ) -> list[dict[str, Any]]:
-    """Construct vector_dbs section in Llama Stack configuration file."""
+    """Construct vector_dbs section in Llama Stack configuration file.
+
+    Builds the vector_dbs section for a Llama Stack configuration.
+
+    Parameters:
+        ls_config (dict[str, Any]): Existing Llama Stack configuration mapping
+        used as the base; existing `vector_dbs` entries are preserved if
+        present.
+        byok_rag (list[ByokRag]): List of BYOK RAG definitions to be added to
+        the `vector_dbs` section.
+
+    Returns:
+        list[dict[str, Any]]: The `vector_dbs` list where each entry is a mapping with keys:
+            - `vector_db_id`: identifier of the vector database
+            - `provider_id`: provider identifier prefixed with `"byok_"`
+            - `embedding_model`: name of the embedding model
+            - `embedding_dimension`: embedding vector dimensionality
+    """
     output = []
 
     # fill-in existing vector_dbs entries
@@ -80,7 +124,27 @@ def construct_vector_dbs_section(
 def construct_vector_io_providers_section(
     ls_config: dict[str, Any], byok_rag: list[ByokRag]
 ) -> list[dict[str, Any]]:
-    """Construct providers/vector_io section in Llama Stack configuration file."""
+    """Construct providers/vector_io section in Llama Stack configuration file.
+
+    Builds the providers/vector_io list for a Llama Stack configuration by
+    preserving existing entries and appending providers derived from BYOK RAG
+    entries.
+
+    Parameters:
+        ls_config (dict[str, Any]): Existing Llama Stack configuration
+        dictionary; if it contains providers.vector_io, those entries are used
+        as the starting list.
+        byok_rag (list[ByokRag]): List of BYOK RAG specifications to convert
+        into provider entries.
+
+    Returns:
+        list[dict[str, Any]]: The resulting providers/vector_io list containing
+        the original entries (if any) plus one entry per item in `byok_rag`.
+        Each appended entry has `provider_id` set to "byok_<vector_db_id>",
+        `provider_type` set from the RAG item, and a `config` with a `kvstore`
+        pointing to ".llama/<vector_db_id>.db", `namespace` as None, and `type`
+        "sqlite".
+    """
     output = []
 
     # fill-in existing vector_io entries
diff --git a/src/quota/token_usage_history.py b/src/quota/token_usage_history.py
index 6ffa658c1..591058fe5 100644
--- a/src/quota/token_usage_history.py
+++ b/src/quota/token_usage_history.py
@@ -7,6 +7,8 @@
 
 import sqlite3
 from datetime import datetime
+from typing import Any
+
 import psycopg2
 
 from log import get_logger
@@ -19,7 +21,11 @@
     CONSUME_TOKENS_FOR_USER_SQLITE,
 )
 
-from models.config import QuotaHandlersConfiguration
+from models.config import (
+    QuotaHandlersConfiguration,
+    SQLiteDatabaseConfiguration,
+    PostgreSQLDatabaseConfiguration,
+)
 from utils.connection_decorator import connection
 
 logger = get_logger(__name__)
@@ -44,9 +50,13 @@ def __init__(self, configuration: QuotaHandlersConfiguration) -> None:
         """
         # store the configuration, it will be used
         # by reconnection logic later, if needed
-        self.sqlite_connection_config = configuration.sqlite
-        self.postgres_connection_config = configuration.postgres
-        self.connection = None
+        self.sqlite_connection_config: SQLiteDatabaseConfiguration | None = (
+            configuration.sqlite
+        )
+        self.postgres_connection_config: PostgreSQLDatabaseConfiguration | None = (
+            configuration.postgres
+        )
+        self.connection: Any | None = None
 
         # initialize connection to DB
         self.connect()