Skip to content

Commit

Permalink
Add prompt processing tps in oga-bench
Browse files Browse the repository at this point in the history
Signed-off-by: David Fan <jiafa@microsoft.com>
  • Loading branch information
jiafatom committed Dec 13, 2024
1 parent 63f11db commit eaaaf38
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/turnkeyml/llm/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Keys:
MEAN_TOKENS_PER_SECOND = "mean_tokens_per_second"
STD_DEV_TOKENS_PER_SECOND = "std_dev_tokens_per_second"
SECONDS_TO_FIRST_TOKEN = "seconds_to_first_token"
PROMPT_PROCESSING_TOKENS_PER_SECOND = "prompt_processing_tokens_per_second"
STD_DEV_SECONDS_TO_FIRST_TOKEN = "std_dev_seconds_to_first_token"
CHECKPOINT = "checkpoint"
DTYPE = "dtype"
Expand Down
3 changes: 3 additions & 0 deletions src/turnkeyml/llm/tools/ort_genai/oga_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self):

self.status_stats = [
Keys.SECONDS_TO_FIRST_TOKEN,
Keys.PROMPT_PROCESSING_TOKENS_PER_SECOND,
Keys.MEAN_TOKENS_PER_SECOND,
Keys.PROMPT_TOKENS,
]
Expand Down Expand Up @@ -144,9 +145,11 @@ def run(
per_iteration_tokens_per_second.append(model.tokens_per_second)

mean_time_to_first_token = statistics.mean(per_iteration_time_to_first_token)
prompt_processing_tokens_per_second = input_ids_len / mean_time_to_first_token
mean_tokens_per_second = statistics.mean(per_iteration_tokens_per_second)

state.save_stat(Keys.SECONDS_TO_FIRST_TOKEN, mean_time_to_first_token)
state.save_stat(Keys.PROMPT_PROCESSING_TOKENS_PER_SECOND, prompt_processing_tokens_per_second)
state.save_stat(Keys.MEAN_TOKENS_PER_SECOND, mean_tokens_per_second)
state.save_stat(Keys.PROMPT_TOKENS, input_ids_len)

Expand Down

0 comments on commit eaaaf38

Please sign in to comment.