Update create cserve example (#93)

anandj91 · web-flow · commit e0cb86ef11bd · 2025-03-24T18:23:50.000-04:00
diff --git a/examples/sdk/create_cserve.py b/examples/sdk/create_cserve.py
@@ -1,34 +1,62 @@
-import time
 import centml
 from centml.sdk.api import get_centml_client
-from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest
+from centml.sdk import DeploymentType, CreateCServeV2DeploymentRequest, CServeV2Recipe
 
-with get_centml_client() as cclient:
-    # Get fastest recipe for the Qwen model
-    fastest = cclient.get_cserve_recipe(model="Qwen/Qwen2-VL-7B-Instruct")[0].fastest
 
-    # Modify the recipe if necessary
-    fastest.recipe.additional_properties["max_num_seqs"] = 512
+def get_fastest_cserve_config(cclient, name, model):
+    fastest = cclient.get_cserve_recipe(model=model)[0].fastest
 
-    # Create CServeV2 deployment
-    request = CreateCServeV2DeploymentRequest(
-        name="qwen-fastest",
+    return CreateCServeV2DeploymentRequest(
+        name=name,
         cluster_id=cclient.get_cluster_id(fastest.hardware_instance_id),
         hardware_instance_id=fastest.hardware_instance_id,
         recipe=fastest.recipe,
         min_scale=1,
         max_scale=1,
         env_vars={},
     )
-    response = cclient.create_cserve(request)
-    print("Create deployment response: ", response)
 
-    # Get deployment details
-    deployment = cclient.get_cserve(response.id)
-    print("Deployment details: ", deployment)
 
-    # Pause the deployment
-    cclient.pause(deployment.id)
+def get_default_cserve_config(cclient, name, model):
+    default_recipe = CServeV2Recipe(model=model)
 
-    # Delete the deployment
-    cclient.delete(deployment.id)
+    hardware_instance = cclient.get_hardware_instances(cluster_id=1001)[0]
+
+    return CreateCServeV2DeploymentRequest(
+        name=name,
+        cluster_id=hardware_instance.cluster_id,
+        hardware_instance_id=hardware_instance.id,
+        recipe=default_recipe,
+        min_scale=1,
+        max_scale=1,
+        env_vars={},
+    )
+
+
+def main():
+    with get_centml_client() as cclient:
+        ### Get the configurations for the Qwen model
+        qwen_config = get_fastest_cserve_config(cclient, name="qwen-fastest", model="Qwen/Qwen2-VL-7B-Instruct")
+        #qwen_config = get_default_cserve_config(cclient, name="qwen-default", model="Qwen/Qwen2-VL-7B-Instruct")
+
+        ### Modify the recipe if necessary
+        qwen_config.recipe.additional_properties["max_num_seqs"] = 512
+
+        # Create CServeV2 deployment
+        response = cclient.create_cserve(qwen_config)
+        print("Create deployment response: ", response)
+
+        ### Get deployment details
+        deployment = cclient.get_cserve(response.id)
+        print("Deployment details: ", deployment)
+
+        '''
+        ### Pause the deployment
+        cclient.pause(deployment.id)
+
+        ### Delete the deployment
+        cclient.delete(deployment.id)
+        '''
+
+if __name__ == "__main__":
+    main()