Skip to content

Commit

Permalink
perf: remove 'always_read_redis' - adding +830ms on each llm call (#6414
Browse files Browse the repository at this point in the history
)

* perf: remove 'always_read_redis' - adding +830ms on each llm call

* test: cleanup codestral tests - backend api unavailable
  • Loading branch information
krrishdholakia authored Oct 25, 2024
1 parent 0f0470f commit d59f8f9
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 57 deletions.
3 changes: 0 additions & 3 deletions litellm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,6 @@
caching: bool = (
False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
always_read_redis: bool = (
True # always use redis for rate limiting logic on litellm proxy
)
caching_with_models: bool = (
False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
)
Expand Down
10 changes: 2 additions & 8 deletions litellm/caching/dual_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def __init__(
redis_cache: Optional[RedisCache] = None,
default_in_memory_ttl: Optional[float] = None,
default_redis_ttl: Optional[float] = None,
always_read_redis: Optional[bool] = True,
) -> None:
super().__init__()
# If in_memory_cache is not provided, use the default InMemoryCache
Expand All @@ -44,7 +43,6 @@ def __init__(
default_in_memory_ttl or litellm.default_in_memory_ttl
)
self.default_redis_ttl = default_redis_ttl or litellm.default_redis_ttl
self.always_read_redis = always_read_redis

def update_cache_ttl(
self, default_in_memory_ttl: Optional[float], default_redis_ttl: Optional[float]
Expand Down Expand Up @@ -102,12 +100,8 @@ def get_cache(self, key, local_only: bool = False, **kwargs):
if in_memory_result is not None:
result = in_memory_result

if (
(self.always_read_redis is True)
and self.redis_cache is not None
and local_only is False
):
# If not found in in-memory cache or always_read_redis is True, try fetching from Redis
if result is None and self.redis_cache is not None and local_only is False:
# If not found in in-memory cache, try fetching from Redis
redis_result = self.redis_cache.get_cache(key, **kwargs)

if redis_result is not None:
Expand Down
2 changes: 1 addition & 1 deletion litellm/integrations/opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ async def async_service_success_hook(
try:
value = str(value)
except Exception:
value = "litllm logging error - could_not_json_serialize"
value = "litellm logging error - could_not_json_serialize"
self.safe_set_attribute(
span=service_logging_span,
key=key,
Expand Down
51 changes: 46 additions & 5 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,51 @@
model_list:
- model_name: gpt-4o
litellm_params:
model: azure/gpt-4o-realtime-preview
api_key: os.environ/AZURE_SWEDEN_API_KEY
api_base: os.environ/AZURE_SWEDEN_API_BASE
model: openai/fake
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app/

litellm_settings:
success_callback: ["langfuse"]
# logged_real_time_event_types: "*"
callbacks: ["prometheus", "otel"]

general_settings:
user_api_key_cache_ttl: 3600

router_settings:
routing_strategy: latency-based-routing
routing_strategy_args:
# only assign 40% of traffic to the fastest deployment to avoid overloading it
lowest_latency_buffer: 0.4

# consider last five minutes of calls for latency calculation
ttl: 300

# model_group_alias:
# gpt-4o: gpt-4o-128k-2024-05-13
# gpt-4o-mini: gpt-4o-mini-128k-2024-07-18

enable_tag_filtering: True

# retry call 3 times on each model_name (we don't use fallbacks, so this would be 3 times total)
num_retries: 3

# -- cooldown settings --
# see https://github.com/BerriAI/litellm/blob/main/litellm/router_utils/cooldown_handlers.py#L265

# cooldown model if it fails > n calls in a minute.
allowed_fails: 2

# (in seconds) how long to cooldown model if fails/min > allowed_fails
cooldown_time: 60

allowed_fails_policy:
InternalServerErrorAllowedFails: 1
RateLimitErrorAllowedFails: 2
TimeoutErrorAllowedFails: 3
# -- end cooldown settings --

# see https://docs.litellm.ai/docs/proxy/prod#3-use-redis-porthost-password-not-redis_url
redis_host: os.environ/REDIS_HOST
redis_port: os.environ/REDIS_PORT
redis_password: os.environ/REDIS_PASSWORD

40 changes: 0 additions & 40 deletions tests/local_testing/test_caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -2066,46 +2066,6 @@ async def test_cache_default_off_acompletion():
assert response3.id == response4.id


@pytest.mark.asyncio()
@pytest.mark.skip(reason="dual caching should first prioritze local cache")
async def test_dual_cache_uses_redis():
"""
- Store diff values in redis and in memory cache
- call get cache
- Assert that value from redis is used
"""
litellm.set_verbose = True
from litellm.caching.caching import DualCache, RedisCache

current_usage = uuid.uuid4()

_cache_obj = DualCache(redis_cache=RedisCache(), always_read_redis=True)

# set cache
await _cache_obj.async_set_cache(key=f"current_usage: {current_usage}", value=10)

# modify value of in memory cache
_cache_obj.in_memory_cache.cache_dict[f"current_usage: {current_usage}"] = 1

# get cache
value = await _cache_obj.async_get_cache(key=f"current_usage: {current_usage}")
print("value from dual cache", value)
assert value == 10


@pytest.mark.asyncio()
async def test_proxy_logging_setup():
"""
Assert always_read_redis is True when used by internal usage cache
"""
from litellm.caching.caching import DualCache
from litellm.proxy.utils import ProxyLogging

pl_obj = ProxyLogging(user_api_key_cache=DualCache())
assert pl_obj.internal_usage_cache.dual_cache.always_read_redis is True


@pytest.mark.skip(reason="local test. Requires sentinel setup.")
@pytest.mark.asyncio
async def test_redis_sentinel_caching():
Expand Down

0 comments on commit d59f8f9

Please sign in to comment.