Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

LiteLLM Minor Fixes and Improvements (09/10/2024) #5618

Merged
merged 11 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions litellm/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -829,18 +829,11 @@ def response_cost_calculator(
)
return None
except Exception as e:
if litellm.suppress_debug_info: # allow cli tools to suppress this information.
verbose_logger.debug(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
str(e), traceback.format_exc()
)
)
else:
verbose_logger.warning(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
str(e), traceback.format_exc()
)
verbose_logger.debug(
"litellm.cost_calculator.py::response_cost_calculator - Returning None. Exception occurred - {}/n{}".format(
str(e), traceback.format_exc()
)
)
return None


Expand Down
9 changes: 5 additions & 4 deletions litellm/integrations/slack_alerting.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,10 +263,11 @@ async def _add_langfuse_trace_id_to_alert(
break
await asyncio.sleep(3) # wait 3s before retrying for trace id

if litellm.litellm_core_utils.litellm_logging.langFuseLogger is not None:
base_url = (
litellm.litellm_core_utils.litellm_logging.langFuseLogger.Langfuse.base_url
)
_langfuse_object = litellm_logging_obj._get_callback_object(
service_name="langfuse"
)
if _langfuse_object is not None:
base_url = _langfuse_object.Langfuse.base_url
return f"{base_url}/trace/{trace_id}"
return None

Expand Down
37 changes: 35 additions & 2 deletions litellm/litellm_core_utils/litellm_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,7 @@ def success_handler(
else:
print_verbose("reaches langfuse for streaming logging!")
result = kwargs["complete_streaming_response"]
temp_langfuse_logger = langFuseLogger
if langFuseLogger is None or (
(
self.langfuse_public_key is not None
Expand All @@ -940,12 +941,12 @@ def success_handler(
and self.langfuse_host != langFuseLogger.langfuse_host
)
):
langFuseLogger = LangFuseLogger(
temp_langfuse_logger = LangFuseLogger(
langfuse_public_key=self.langfuse_public_key,
langfuse_secret=self.langfuse_secret,
langfuse_host=self.langfuse_host,
)
_response = langFuseLogger.log_event(
_response = temp_langfuse_logger.log_event(
kwargs=kwargs,
response_obj=result,
start_time=start_time,
Expand Down Expand Up @@ -1925,6 +1926,38 @@ def _get_trace_id(self, service_name: Literal["langfuse"]) -> Optional[str]:

return trace_id

def _get_callback_object(self, service_name: Literal["langfuse"]) -> Optional[Any]:
"""
Return dynamic callback object.

Meant to solve issue when doing key-based/team-based logging
"""
global langFuseLogger

if service_name == "langfuse":
if langFuseLogger is None or (
(
self.langfuse_public_key is not None
and self.langfuse_public_key != langFuseLogger.public_key
)
or (
self.langfuse_public_key is not None
and self.langfuse_public_key != langFuseLogger.public_key
)
or (
self.langfuse_host is not None
and self.langfuse_host != langFuseLogger.langfuse_host
)
):
return LangFuseLogger(
langfuse_public_key=self.langfuse_public_key,
langfuse_secret=self.langfuse_secret,
langfuse_host=self.langfuse_host,
)
return langFuseLogger

return None


def set_callbacks(callback_list, function_id=None):
"""
Expand Down
25 changes: 24 additions & 1 deletion litellm/llms/databricks/cost_calculator.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,30 @@ def cost_per_token(model: str, usage: Usage) -> Tuple[float, float]:
"dbrx-instruct"
):
base_model = "databricks-dbrx-instruct"

elif model.startswith("databricks/meta-llama-3.1-70b-instruct") or model.startswith(
"meta-llama-3.1-70b-instruct"
):
base_model = "databricks-meta-llama-3-1-70b-instruct"
elif model.startswith(
"databricks/meta-llama-3.1-405b-instruct"
) or model.startswith("meta-llama-3.1-405b-instruct"):
base_model = "databricks-meta-llama-3-1-405b-instruct"
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
"mixtral-8x7b-instruct-v0.1"
):
base_model = "databricks-mixtral-8x7b-instruct"
elif model.startswith("databricks/mixtral-8x7b-instruct-v0.1") or model.startswith(
"mixtral-8x7b-instruct-v0.1"
):
base_model = "databricks-mixtral-8x7b-instruct"
elif model.startswith("databricks/bge-large-en") or model.startswith(
"bge-large-en"
):
base_model = "databricks-bge-large-en"
elif model.startswith("databricks/gte-large-en") or model.startswith(
"gte-large-en"
):
base_model = "databricks-gte-large-en"
## GET MODEL INFO
model_info = get_model_info(model=base_model, custom_llm_provider="databricks")

Expand Down
16 changes: 11 additions & 5 deletions litellm/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5310,7 +5310,7 @@ def stream_chunk_builder(
]

if len(tool_call_chunks) > 0:
argument_list = []
argument_list: List = []
delta = tool_call_chunks[0]["choices"][0]["delta"]
message = response["choices"][0]["message"]
message["tool_calls"] = []
Expand All @@ -5319,6 +5319,7 @@ def stream_chunk_builder(
type = None
tool_calls_list = []
prev_index = None
prev_name = None
prev_id = None
curr_id = None
curr_index = 0
Expand Down Expand Up @@ -5346,27 +5347,32 @@ def stream_chunk_builder(
type = tool_calls[0].type
if prev_index is None:
prev_index = curr_index
if prev_name is None:
prev_name = name
if curr_index != prev_index: # new tool call
combined_arguments = "".join(argument_list)
tool_calls_list.append(
{
"id": prev_id,
"index": prev_index,
"function": {"arguments": combined_arguments, "name": name},
"function": {
"arguments": combined_arguments,
"name": prev_name,
},
"type": type,
}
)
argument_list = [] # reset
prev_index = curr_index
prev_id = curr_id
prev_name = name

combined_arguments = (
"".join(argument_list) or "{}"
) # base case, return empty dict

tool_calls_list.append(
{
"id": id,
"index": curr_index,
"function": {"arguments": combined_arguments, "name": name},
"type": type,
}
Expand Down Expand Up @@ -5422,7 +5428,7 @@ def stream_chunk_builder(
for choice in choices:
delta = choice.get("delta", {})
content = delta.get("content", "")
if content == None:
if content is None:
continue # openai v1.0.0 sets content = None for chunks
content_list.append(content)

Expand Down
95 changes: 67 additions & 28 deletions litellm/model_prices_and_context_window_backup.json
Original file line number Diff line number Diff line change
Expand Up @@ -5459,90 +5459,129 @@
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.000005,
"output_cost_per_token": 0.000015,
"input_dbu_cost_per_token": 0.000071429,
"output_cost_per_token": 0.00001500002,
"output_db_cost_per_token": 0.000214286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-meta-llama-3-1-70b-instruct": {
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000003,
"input_cost_per_token": 0.00000100002,
"input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-dbrx-instruct": {
"max_tokens": 32768,
"max_input_tokens": 32768,
"max_output_tokens": 32768,
"input_cost_per_token": 0.00000075,
"output_cost_per_token": 0.00000225,
"input_cost_per_token": 0.00000074998,
"input_dbu_cost_per_token": 0.000010714,
"output_cost_per_token": 0.00000224901,
"output_dbu_cost_per_token": 0.000032143,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-meta-llama-3-70b-instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000003,
"max_tokens": 128000,
"max_input_tokens": 128000,
"max_output_tokens": 128000,
"input_cost_per_token": 0.00000100002,
"input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000299999,
"output_dbu_cost_per_token": 0.000042857,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-llama-2-70b-chat": {
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"input_cost_per_token": 0.00000050001,
"input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0000015,
"output_dbu_cost_per_token": 0.000021429,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"

"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-mixtral-8x7b-instruct": {
"max_tokens": 4096,
"max_input_tokens": 4096,
"max_output_tokens": 4096,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.000001,
"input_cost_per_token": 0.00000050001,
"input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-mpt-30b-instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"input_cost_per_token": 0.00000099902,
"input_dbu_cost_per_token": 0.000014286,
"output_cost_per_token": 0.00000099902,
"output_dbu_cost_per_token": 0.000014286,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-mpt-7b-instruct": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"max_output_tokens": 8192,
"input_cost_per_token": 0.0000005,
"output_cost_per_token": 0.0000005,
"input_cost_per_token": 0.00000050001,
"input_dbu_cost_per_token": 0.000007143,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks",
"mode": "chat",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-bge-large-en": {
"max_tokens": 512,
"max_input_tokens": 512,
"output_vector_size": 1024,
"input_cost_per_token": 0.0000001,
"input_cost_per_token": 0.00000010003,
"input_dbu_cost_per_token": 0.000001429,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks",
"mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
},
"databricks/databricks-gte-large-en": {
"max_tokens": 8192,
"max_input_tokens": 8192,
"output_vector_size": 1024,
"input_cost_per_token": 0.00000012999,
"input_dbu_cost_per_token": 0.000001857,
"output_cost_per_token": 0.0,
"output_dbu_cost_per_token": 0.0,
"litellm_provider": "databricks",
"mode": "embedding",
"source": "https://www.databricks.com/product/pricing/foundation-model-serving"
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}
}
}
12 changes: 6 additions & 6 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
model_list:
- model_name: "gpt-turbo"
- model_name: "gpt-4o"
litellm_params:
model: azure/chatgpt-v-2
api_key: os.environ/AZURE_API_KEY
api_base: os.environ/AZURE_API_BASE
model: gpt-4o

router_settings:
model_group_alias: {"gpt-4": "gpt-turbo"}
litellm_settings:
cache: true
cache_params:
type: local
Loading