add litellm caching

SmartManoj · Aug 24, 2024 · 092f007 · 092f007
1 parent 840802e
commit 092f007
Show file tree

Hide file tree

Showing 2 changed files with 5 additions and 0 deletions.
diff --git a/openhands/core/config.py b/openhands/core/config.py
@@ -52,6 +52,7 @@ class LLMConfig:
         output_cost_per_token: The cost per output token. This will available in logs for the user to check.
         ollama_base_url: The base URL for the OLLAMA API.
         drop_params: Drop any unmapped (unsupported) params without causing an exception.
+        enable_cache: Whether to enable caching.
     """
 
     model: str = 'gpt-4o'
@@ -80,6 +81,7 @@ class LLMConfig:
     ollama_base_url: str | None = None
     message_summary_trunc_tokens_frac: float = 0.75
     drop_params: bool | None = None
+    enable_cache: bool = True
 
     def defaults_to_dict(self) -> dict:
         """Serialize fields to a dict for the frontend, including type hints, defaults, and whether it's optional."""

diff --git a/openhands/llm/llm.py b/openhands/llm/llm.py
@@ -12,6 +12,7 @@
     import litellm
 from litellm import completion as litellm_completion
 from litellm import completion_cost as litellm_completion_cost
+from litellm.caching import Cache
 from litellm.exceptions import (
     APIConnectionError,
     ContentPolicyViolationError,
@@ -38,6 +39,7 @@
 from openhands.core.metrics import Metrics
 
 message_separator = '\n\n----------\n\n'
+litellm.cache = Cache()
 
 
 class LLM(CondenserMixin):
@@ -124,6 +126,7 @@ def __init__(
             timeout=self.config.timeout,
             temperature=self.config.temperature,
             top_p=self.config.top_p,
+            caching=self.config.enable_cache,
         )
 
         def attempt_on_error(retry_state):