Skip to content

Commit

Permalink
fix: dummy client to output tokens and random responses (#106)
Browse files Browse the repository at this point in the history
  • Loading branch information
lorr1 authored Jul 2, 2023
1 parent b775d15 commit 49f5195
Show file tree
Hide file tree
Showing 4 changed files with 502 additions and 167 deletions.
167 changes: 98 additions & 69 deletions manifest/clients/dummy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
"""Dummy client."""
import hashlib
import logging
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import tiktoken

from manifest.clients.client import Client
from manifest.request import LMChatRequest, LMRequest, LMScoreRequest, Request
Expand All @@ -14,7 +18,13 @@ class DummyClient(Client):

# User param -> (client param, default value)
PARAMS = {
"n": ("num_results", 1),
"engine": ("model", "text-davinci-003"),
"temperature": ("temperature", 0.0),
"max_tokens": ("max_tokens", 10),
"n": ("n", 1),
"top_p": ("top_p", 1.0),
"top_k": ("best_of", 1),
"batch_size": ("batch_size", 20),
}
REQUEST_CLS = LMRequest
NAME = "dummy"
Expand All @@ -33,6 +43,9 @@ def connect(
connection_str: connection string.
client_args: client arguments.
"""
# We tiktoken as it is faster than HF for tokenizing
# Use any model to create the tokenizer
self.encoder = tiktoken.get_encoding("cl100k_base")
for key in self.PARAMS:
setattr(self, key, client_args.pop(key, self.PARAMS[key][1]))

Expand Down Expand Up @@ -74,7 +87,65 @@ def get_model_params(self) -> Dict:
Returns:
model params.
"""
return {"engine": "dummy"}
return {"engine": "dummy", "model": getattr(self, "engine")}

def get_mock_output(
self, output_toks: int, is_completion: bool, seed: Optional[int] = None
) -> LMModelChoice:
"""Return mock model output by generating random tokens."""
np.random.seed(seed)
random_tokens = np.random.randint(
0, self.encoder.max_token_value + 1, output_toks
)
response = self.encoder.decode(random_tokens) # type: ignore
if is_completion:
np.random.seed(seed)
random_logprobs = np.random.uniform(
low=-2, high=-0.00001, size=output_toks
).tolist()
else:
# Return all Nones to mimic chat models
# OpenAI chat models do not return logprobs
random_logprobs = [None] * output_toks
return LMModelChoice(
text=response,
token_logprobs=random_logprobs,
tokens=random_tokens.tolist(),
)

def get_mock_choices(
self,
prompt_list: List[str],
request_params: Dict,
is_completion: bool,
) -> Tuple[List[LMModelChoice], List[Usage]]:
"""Get choices and usages of mock output."""
choices = []
usages = []
for prompt in prompt_list:
num_prompt_tokens = len(self.encoder.encode(prompt))
if request_params["temperature"] == 0:
# Get integer seed from hash of prompt
seed = (
int(hashlib.sha256(prompt.encode("utf-8")).hexdigest(), 16)
% 10**8
)
else:
# Get random seed
seed = None
for _ in range(int(request_params["n"])):
choice = self.get_mock_output(
request_params["max_tokens"], is_completion=is_completion, seed=seed
)
choices.append(choice)
usages.append(
Usage(
prompt_tokens=num_prompt_tokens,
completion_tokens=request_params["max_tokens"],
total_tokens=num_prompt_tokens + request_params["max_tokens"],
)
)
return choices, usages

def run_request(self, request: Request) -> Response:
"""
Expand All @@ -88,32 +159,19 @@ def run_request(self, request: Request) -> Response:
request parameters as dict.
"""
if isinstance(request.prompt, list):
num_results = len(request.prompt)
prompt_list = request.prompt
else:
num_results = 1
prompt_list = [request.prompt]
request_params = request.to_dict(self.PARAMS)

choices, usages = self.get_mock_choices(
prompt_list, request_params, is_completion=True
)
return Response(
response=ModelChoices(
choices=[LMModelChoice(text="hello")] # type: ignore
* int(request_params["num_results"])
* num_results
),
response=ModelChoices(choices=choices), # type: ignore
cached=False,
request=request,
usages=Usages(
usages=[
Usage(
**{
"prompt_tokens": 1,
"completion_tokens": 1,
"total_tokens": 2,
}
)
]
* int(request_params["num_results"])
* num_results
),
usages=Usages(usages=usages),
response_type="text",
request_type=self.REQUEST_CLS,
)
Expand Down Expand Up @@ -145,35 +203,17 @@ def run_chat_request(
Returns:
response.
"""
num_results = 1
response_dict = {
"choices": [
{
"text": request.prompt[0]["content"],
}
for i in range(num_results)
]
}
prompt_list = ["_".join(pmp["content"] for pmp in request.prompt)]
request_params = request.to_dict(self.PARAMS)

choices, usages = self.get_mock_choices(
prompt_list, request_params, is_completion=False
)
return Response(
response=ModelChoices(
choices=[
LMModelChoice(**choice) # type: ignore
for choice in response_dict["choices"]
]
),
response=ModelChoices(choices=choices), # type: ignore
cached=False,
request=request,
usages=Usages(
usages=[
Usage(
**{
"prompt_tokens": 1,
"completion_tokens": 1,
"total_tokens": 2,
}
)
]
),
usages=Usages(usages=usages),
response_type="text",
request_type=LMChatRequest,
)
Expand All @@ -193,30 +233,19 @@ def run_score_prompt_request(
request parameters as dict.
"""
if isinstance(request.prompt, list):
num_results = len(request.prompt)
prompt_list = request.prompt
else:
num_results = 1
response_dict = {
"choices": [
{
"text": request.prompt
if isinstance(request.prompt, str)
else request.prompt[i],
"token_logprobs": [0.3],
}
for i in range(num_results)
]
}
prompt_list = [request.prompt]
request_params = request.to_dict(self.PARAMS)

choices, usages = self.get_mock_choices(
prompt_list, request_params, is_completion=True
)
return Response(
response=ModelChoices(
choices=[
LMModelChoice(**choice) # type: ignore
for choice in response_dict["choices"]
]
),
response=ModelChoices(choices=choices), # type: ignore
cached=False,
request=request,
usages=None,
usages=Usages(usages=usages),
response_type="text",
request_type=LMScoreRequest,
)
2 changes: 1 addition & 1 deletion manifest/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class LMModelChoice(BaseModel):
"""Model single completion."""

text: str
token_logprobs: Optional[List[float]] = None
token_logprobs: Optional[List[Optional[float]]] = None
tokens: Optional[List[str]] = None


Expand Down
Loading

0 comments on commit 49f5195

Please sign in to comment.