Skip to content

Commit

Permalink
fix: encoding for local llm. OpenBMB#100
Browse files Browse the repository at this point in the history
  • Loading branch information
chenweize1998 committed Nov 12, 2023
1 parent 2557283 commit abacf5a
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion agentverse/llms/utils/token_counter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@


def count_string_tokens(prompt: str = "", model: str = "gpt-3.5-turbo") -> int:
return len(tiktoken.encoding_for_model(model).encode(prompt))
if model.startswith("gpt-3.5-turbo") or model.startswith("gpt-4"):
return len(tiktoken.encoding_for_model(model).encode(prompt))
elif model.lower() in LOCAL_LLMS or model in LOCAL_LLMS:
from transformers import AutoTokenizer
encoding = AutoTokenizer.from_pretrained(LOCAL_LLMS_MAPPING[model.lower()])
return len(encoding.encode(prompt))


def count_message_tokens(
Expand Down

0 comments on commit abacf5a

Please sign in to comment.