Skip to content

Commit

Permalink
Merge branch 'main' into litellm_slack_langfuse_alerting
Browse files Browse the repository at this point in the history
  • Loading branch information
krrishdholakia authored Feb 6, 2024
2 parents be81183 + db68b83 commit 9e9fb74
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 26 deletions.
4 changes: 3 additions & 1 deletion litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ litellm_settings:
type: "redis-semantic"
similarity_threshold: 0.8
redis_semantic_cache_embedding_model: azure-embedding-model
# cache: True
upperbound_key_generate_params:
max_budget: 100
duration: "30d"
# setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]

Expand Down
49 changes: 37 additions & 12 deletions litellm/proxy/proxy_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1749,15 +1749,41 @@ async def async_data_generator(response, user_api_key_dict):
done_message = "[DONE]"
yield f"data: {done_message}\n\n"
except Exception as e:
yield f"data: {str(e)}\n\n"
traceback.print_exc()
await proxy_logging_obj.post_call_failure_hook(
user_api_key_dict=user_api_key_dict, original_exception=e
)
verbose_proxy_logger.debug(
f"\033[1;31mAn error occurred: {e}\n\n Debug this by setting `--debug`, e.g. `litellm --model gpt-3.5-turbo --debug`"
)
router_model_names = (
[m["model_name"] for m in llm_model_list]
if llm_model_list is not None
else []
)
if user_debug:
traceback.print_exc()

if isinstance(e, HTTPException):
raise e
else:
error_traceback = traceback.format_exc()
error_msg = f"{str(e)}\n\n{error_traceback}"

raise ProxyException(
message=getattr(e, "message", error_msg),
type=getattr(e, "type", "None"),
param=getattr(e, "param", "None"),
code=getattr(e, "status_code", 500),
)


def select_data_generator(response, user_api_key_dict):
try:
# since boto3 - sagemaker does not support async calls, we should use a sync data_generator
if hasattr(
response, "custom_llm_provider"
) and response.custom_llm_provider in ["sagemaker", "together_ai"]:
) and response.custom_llm_provider in ["sagemaker"]:
return data_generator(
response=response,
)
Expand Down Expand Up @@ -2242,7 +2268,6 @@ async def chat_completion(
selected_data_generator = select_data_generator(
response=response, user_api_key_dict=user_api_key_dict
)

return StreamingResponse(
selected_data_generator,
media_type="text/event-stream",
Expand Down Expand Up @@ -4064,16 +4089,16 @@ async def health_readiness():
cache_type = {"type": cache_type, "index_info": index_info}

if prisma_client is not None: # if db passed in, check if it's connected
if prisma_client.db.is_connected() == True:
response_object = {"db": "connected"}
await prisma_client.health_check() # test the db connection
response_object = {"db": "connected"}

return {
"status": "healthy",
"db": "connected",
"cache": cache_type,
"litellm_version": version,
"success_callbacks": litellm.success_callback,
}
return {
"status": "healthy",
"db": "connected",
"cache": cache_type,
"litellm_version": version,
"success_callbacks": litellm.success_callback,
}
else:
return {
"status": "healthy",
Expand Down
17 changes: 15 additions & 2 deletions litellm/proxy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,8 +480,6 @@ async def get_data(
reset_at: Optional[datetime] = None,
):
try:
print_verbose("PrismaClient: get_data")

response: Any = None
if token is not None or (table_name is not None and table_name == "key"):
# check if plain text or hash
Expand Down Expand Up @@ -893,6 +891,21 @@ async def disconnect(self):
)
raise e

async def health_check(self):
"""
Health check endpoint for the prisma client
"""
sql_query = """
SELECT 1
FROM "LiteLLM_VerificationToken"
LIMIT 1
"""

# Execute the raw query
# The asterisk before `user_id_list` unpacks the list into separate arguments
response = await self.db.query_raw(sql_query)
return response


class DBClient:
"""
Expand Down
2 changes: 1 addition & 1 deletion litellm/tests/test_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def test_completion_custom_provider_model_name():
messages=messages,
logger_fn=logger_fn,
)
# Add any assertions here to check the, response
# Add any assertions here to check the,response
print(response)
print(response["choices"][0]["finish_reason"])
except litellm.Timeout as e:
Expand Down
34 changes: 24 additions & 10 deletions litellm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ def map_finish_reason(
return "stop"
elif finish_reason == "SAFETY": # vertex ai
return "content_filter"
elif finish_reason == "STOP": # vertex ai
return "stop"
return finish_reason


Expand Down Expand Up @@ -1305,7 +1307,7 @@ def success_handler(
)
if callback == "langfuse":
global langFuseLogger
verbose_logger.debug("reaches langfuse for logging!")
verbose_logger.debug("reaches langfuse for success logging!")
kwargs = {}
for k, v in self.model_call_details.items():
if (
Expand Down Expand Up @@ -6716,7 +6718,13 @@ def exception_type(
message=f"VertexAIException - {error_str}",
model=model,
llm_provider="vertex_ai",
response=original_exception.response,
response=httpx.Response(
status_code=429,
request=httpx.Request(
method="POST",
url=" https://cloud.google.com/vertex-ai/",
),
),
)
elif (
"429 Quota exceeded" in error_str
Expand Down Expand Up @@ -8351,13 +8359,20 @@ def chunk_creator(self, chunk):
completion_obj["content"] = chunk.text
elif self.custom_llm_provider and (self.custom_llm_provider == "vertex_ai"):
try:
# print(chunk)
if hasattr(chunk, "text"):
# vertexAI chunks return
# MultiCandidateTextGenerationResponse(text=' ```python\n# This Python code says "Hi" 100 times.\n\n# Create', _prediction_response=Prediction(predictions=[{'candidates': [{'content': ' ```python\n# This Python code says "Hi" 100 times.\n\n# Create', 'author': '1'}], 'citationMetadata': [{'citations': None}], 'safetyAttributes': [{'blocked': False, 'scores': None, 'categories': None}]}], deployed_model_id='', model_version_id=None, model_resource_name=None, explanations=None), is_blocked=False, safety_attributes={}, candidates=[ ```python
# This Python code says "Hi" 100 times.
# Create])
completion_obj["content"] = chunk.text
if hasattr(chunk, "candidates") == True:
try:
completion_obj["content"] = chunk.text
if hasattr(chunk.candidates[0], "finish_reason"):
model_response.choices[
0
].finish_reason = map_finish_reason(
chunk.candidates[0].finish_reason.name
)
except:
if chunk.candidates[0].finish_reason.name == "SAFETY":
raise Exception(
f"The response was blocked by VertexAI. {str(chunk)}"
)
else:
completion_obj["content"] = str(chunk)
except StopIteration as e:
Expand Down Expand Up @@ -8646,7 +8661,6 @@ async def __anext__(self):
or self.custom_llm_provider == "ollama_chat"
or self.custom_llm_provider == "vertex_ai"
):
print_verbose(f"INSIDE ASYNC STREAMING!!!")
print_verbose(
f"value of async completion stream: {self.completion_stream}"
)
Expand Down

0 comments on commit 9e9fb74

Please sign in to comment.