|
1 |
| -import time |
2 | 1 | import centml
|
3 | 2 | from centml.sdk.api import get_centml_client
|
4 |
| -from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest |
| 3 | +from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe |
5 | 4 |
|
6 |
| -with get_centml_client() as cclient: |
7 |
| - # Get fastest recipe for the Qwen model |
8 |
| - fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest |
9 | 5 |
|
10 |
| - # Modify the recipe if necessary |
11 |
| - fastest.recipe.additional_properties["max_num_seqs"] = 512 |
| 6 | +def get_fastest_cserve_config(cclient, name, model): |
| 7 | + fastest = cclient.get_cserve_recipe(model=model)[0].fastest |
12 | 8 |
|
13 |
| - # Create CServeV2 deployment |
14 |
| - request = CreateCServeV2DeploymentRequest( |
15 |
| - name="qwen-fastest", |
| 9 | + return CreateCServeV2DeploymentRequest( |
| 10 | + name=name, |
16 | 11 | cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
|
17 | 12 | hardware_instance_id=fastest.hardware_instance_id,
|
18 | 13 | recipe=fastest.recipe,
|
19 | 14 | min_scale=1,
|
20 | 15 | max_scale=1,
|
21 | 16 | env_vars={},
|
22 | 17 | )
|
23 |
| - response = cclient.create_cserve(request) |
24 |
| - print("Create deployment response: ", response) |
25 | 18 |
|
26 |
| - # Get deployment details |
27 |
| - deployment = cclient.get_cserve(response.id) |
28 |
| - print("Deployment details: ", deployment) |
29 | 19 |
|
30 |
| - # Pause the deployment |
31 |
| - cclient.pause(deployment.id) |
| 20 | +def get_default_cserve_config(cclient, name, model): |
| 21 | + default_recipe = CServeV2Recipe(model=model) |
32 | 22 |
|
33 |
| - # Delete the deployment |
34 |
| - cclient.delete(deployment.id) |
| 23 | + hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0] |
| 24 | + |
| 25 | + return CreateCServeV2DeploymentRequest( |
| 26 | + name=name, |
| 27 | + cluster_id=hardware_instance.cluster_id, |
| 28 | + hardware_instance_id=hardware_instance.id, |
| 29 | + recipe=default_recipe, |
| 30 | + min_scale=1, |
| 31 | + max_scale=1, |
| 32 | + env_vars={}, |
| 33 | + ) |
| 34 | + |
| 35 | + |
| 36 | +def main(): |
| 37 | + with get_centml_client() as cclient: |
| 38 | + ### Get the configurations for the Qwen model |
| 39 | + qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct") |
| 40 | + #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct") |
| 41 | + |
| 42 | + ### Modify the recipe if necessary |
| 43 | + qwen_config.recipe.additional_properties["max_num_seqs"] = 512 |
| 44 | + |
| 45 | + # Create CServeV2 deployment |
| 46 | + response = cclient.create_cserve(qwen_config) |
| 47 | + print("Create deployment response: ", response) |
| 48 | + |
| 49 | + ### Get deployment details |
| 50 | + deployment = cclient.get_cserve(response.id) |
| 51 | + print("Deployment details: ", deployment) |
| 52 | + |
| 53 | + ''' |
| 54 | + ### Pause the deployment |
| 55 | + cclient.pause(deployment.id) |
| 56 | +
|
| 57 | + ### Delete the deployment |
| 58 | + cclient.delete(deployment.id) |
| 59 | + ''' |
| 60 | + |
| 61 | +if __name__ == "__main__": |
| 62 | + main() |
0 commit comments