Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

openai compatiable api usage and id #9800

Merged
merged 2 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions api/core/model_runtime/entities/llm_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ class LLMResult(BaseModel):
Model class for llm result.
"""

id: Optional[str] = None
model: str
prompt_messages: list[PromptMessage]
message: AssistantPromptMessage
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,16 +397,21 @@ def _handle_generate_stream_response(
chunk_index = 0

def create_final_llm_result_chunk(
index: int, message: AssistantPromptMessage, finish_reason: str
id: Optional[str], index: int, message: AssistantPromptMessage, finish_reason: str, usage: dict
) -> LLMResultChunk:
# calculate num tokens
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)
prompt_tokens = usage and usage.get("prompt_tokens")
if prompt_tokens is None:
prompt_tokens = self._num_tokens_from_string(model, prompt_messages[0].content)
completion_tokens = usage and usage.get("completion_tokens")
if completion_tokens is None:
completion_tokens = self._num_tokens_from_string(model, full_assistant_content)

# transform usage
usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens)

return LLMResultChunk(
id=id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(index=index, message=message, finish_reason=finish_reason, usage=usage),
Expand Down Expand Up @@ -450,7 +455,7 @@ def get_tool_call(tool_call_id: str):
tool_call.function.arguments += new_tool_call.function.arguments

finish_reason = None # The default value of finish_reason is None

message_id, usage = None, None
for chunk in response.iter_lines(decode_unicode=True, delimiter=delimiter):
chunk = chunk.strip()
if chunk:
Expand All @@ -462,20 +467,26 @@ def get_tool_call(tool_call_id: str):
continue

try:
chunk_json = json.loads(decoded_chunk)
chunk_json: dict = json.loads(decoded_chunk)
# stream ended
except json.JSONDecodeError as e:
yield create_final_llm_result_chunk(
id=message_id,
index=chunk_index + 1,
message=AssistantPromptMessage(content=""),
finish_reason="Non-JSON encountered.",
usage=usage,
)
break
if chunk_json:
if u := chunk_json.get("usage"):
usage = u
if not chunk_json or len(chunk_json["choices"]) == 0:
continue

choice = chunk_json["choices"][0]
finish_reason = chunk_json["choices"][0].get("finish_reason")
message_id = chunk_json.get("id")
chunk_index += 1

if "delta" in choice:
Expand Down Expand Up @@ -524,6 +535,7 @@ def get_tool_call(tool_call_id: str):
continue

yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
Expand All @@ -536,6 +548,7 @@ def get_tool_call(tool_call_id: str):

if tools_calls:
yield LLMResultChunk(
id=message_id,
model=model,
prompt_messages=prompt_messages,
delta=LLMResultChunkDelta(
Expand All @@ -545,17 +558,22 @@ def get_tool_call(tool_call_id: str):
)

yield create_final_llm_result_chunk(
index=chunk_index, message=AssistantPromptMessage(content=""), finish_reason=finish_reason
id=message_id,
index=chunk_index,
message=AssistantPromptMessage(content=""),
finish_reason=finish_reason,
usage=usage,
)

def _handle_generate_response(
self, model: str, credentials: dict, response: requests.Response, prompt_messages: list[PromptMessage]
) -> LLMResult:
response_json = response.json()
response_json: dict = response.json()

completion_type = LLMMode.value_of(credentials["mode"])

output = response_json["choices"][0]
message_id = response_json.get("id")

response_content = ""
tool_calls = None
Expand Down Expand Up @@ -593,6 +611,7 @@ def _handle_generate_response(

# transform response
result = LLMResult(
id=message_id,
model=response_json["model"],
prompt_messages=prompt_messages,
message=assistant_message,
Expand Down