Skip to content

Commit e0cb86e

Browse files
authored
Update create cserve example (#93)
1 parent 275b511 commit e0cb86e

File tree

1 file changed

+47
-19
lines changed

1 file changed

+47
-19
lines changed

examples/sdk/create_cserve.py

Lines changed: 47 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,62 @@
1-
import time
21
import centml
32
from centml.sdk.api import get_centml_client
4-
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
3+
from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
54

6-
with get_centml_client() as cclient:
7-
# Get fastest recipe for the Qwen model
8-
fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest
95

10-
# Modify the recipe if necessary
11-
fastest.recipe.additional_properties["max_num_seqs"] = 512
6+
def get_fastest_cserve_config(cclient, name, model):
7+
fastest = cclient.get_cserve_recipe(model=model)[0].fastest
128

13-
# Create CServeV2 deployment
14-
request = CreateCServeV2DeploymentRequest(
15-
name="qwen-fastest",
9+
return CreateCServeV2DeploymentRequest(
10+
name=name,
1611
cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
1712
hardware_instance_id=fastest.hardware_instance_id,
1813
recipe=fastest.recipe,
1914
min_scale=1,
2015
max_scale=1,
2116
env_vars={},
2217
)
23-
response = cclient.create_cserve(request)
24-
print("Create deployment response: ", response)
2518

26-
# Get deployment details
27-
deployment = cclient.get_cserve(response.id)
28-
print("Deployment details: ", deployment)
2919

30-
# Pause the deployment
31-
cclient.pause(deployment.id)
20+
def get_default_cserve_config(cclient, name, model):
21+
default_recipe = CServeV2Recipe(model=model)
3222

33-
# Delete the deployment
34-
cclient.delete(deployment.id)
23+
hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0]
24+
25+
return CreateCServeV2DeploymentRequest(
26+
name=name,
27+
cluster_id=hardware_instance.cluster_id,
28+
hardware_instance_id=hardware_instance.id,
29+
recipe=default_recipe,
30+
min_scale=1,
31+
max_scale=1,
32+
env_vars={},
33+
)
34+
35+
36+
def main():
37+
with get_centml_client() as cclient:
38+
### Get the configurations for the Qwen model
39+
qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct")
40+
#qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")
41+
42+
### Modify the recipe if necessary
43+
qwen_config.recipe.additional_properties["max_num_seqs"] = 512
44+
45+
# Create CServeV2 deployment
46+
response = cclient.create_cserve(qwen_config)
47+
print("Create deployment response: ", response)
48+
49+
### Get deployment details
50+
deployment = cclient.get_cserve(response.id)
51+
print("Deployment details: ", deployment)
52+
53+
'''
54+
### Pause the deployment
55+
cclient.pause(deployment.id)
56+
57+
### Delete the deployment
58+
cclient.delete(deployment.id)
59+
'''
60+
61+
if __name__ == "__main__":
62+
main()

0 commit comments

Comments
 (0)