-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.py
111 lines (103 loc) · 3.53 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import json
import os
import time
import shortuuid
from openai import OpenAI
def test(batch_size: int = 10, num_tests: int = 10, use_guidance: bool = False):
print(f"###### bs={batch_size}, use_guidance={use_guidance} ######")
guidance = {
"seed": 1000,
"guided_json": {
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": {
"type": "string"
},
"output": {
"type": "string"
}
},
"required": ["explanation", "output"],
"additionalProperties": False
}
},
"final_answer": {
"type": "string"
}
},
"required": ["steps", "final_answer"],
"additionalProperties": False
}
}
print(guidance if use_guidance else {})
client = OpenAI(
base_url="http://localhost:8000/v1",
api_key="empty",
)
folder_name = f"bs_{batch_size}_guidance_{use_guidance}_{shortuuid.random()}"
os.makedirs(folder_name, exist_ok=True)
response_times = []
for i in range(num_tests):
os.makedirs(f"{folder_name}/attempt_{i}", exist_ok=True)
start = time.time()
response = client.chat.completions.create(
model="neuralmagic/Llama-3.2-3B-Instruct-FP8",
messages=[
{
"role": "system",
"content": """You are a helpful math tutor. Format your answers in JSON format, conform to the following json schema:
{
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": {
"type": "string"
},
"output": {
"type": "string"
}
},
"required": ["explanation", "output"],
"additionalProperties": False
}
},
"final_answer": {
"type": "string"
}
},
"required": ["steps", "final_answer"],
"additionalProperties": False
}"""
},
{
"role": "user",
"content": "solve 8x + 31 = 2"
}
],
n=batch_size,
extra_body = guidance if use_guidance else {"seed": 1000},
)
response_time = time.time() - start
response_times.append(response_time)
print(f"Time taken: {response_time}s")
for j, response in enumerate(response.choices):
raw_text = response.message.content
with open(f"{folder_name}/attempt_{i}/raw_{j}.json", "w") as f:
f.write(raw_text)
report_dict = {'response_times': response_times, 'average_response_time': sum(response_times) / len(response_times)}
with open(f"{folder_name}/report.json", "w") as f:
json.dump(report_dict, f, indent=4)
if __name__ == '__main__':
# test(batch_size=1, num_tests=10, use_guidance=False)
# test(batch_size=30, num_tests=10, use_guidance=False)
test(batch_size=1, num_tests=1, use_guidance=True)
test(batch_size=30, num_tests=10, use_guidance=True)