6
6
DeploymentType ,
7
7
DeploymentStatus ,
8
8
CreateInferenceDeploymentRequest ,
9
+ CreateInferenceV3DeploymentRequest ,
9
10
CreateComputeDeploymentRequest ,
10
11
CreateCServeV2DeploymentRequest ,
11
12
CreateCServeV3DeploymentRequest ,
@@ -30,7 +31,21 @@ def get_status(self, id):
30
31
return self ._api .get_deployment_status_deployments_status_deployment_id_get (id )
31
32
32
33
def get_inference (self , id ):
33
- return self ._api .get_inference_deployment_deployments_inference_deployment_id_get (id )
34
+ """Get Inference deployment details - automatically handles both V2 and V3 deployments"""
35
+ # Try V3 first (recommended), fallback to V2 if deployment is V2
36
+ try :
37
+ return self ._api .get_inference_v3_deployment_deployments_inference_v3_deployment_id_get (id )
38
+ except ApiException as e :
39
+ # If V3 fails with 404 or similar, try V2
40
+ if e .status in [404 , 400 ]: # Deployment might be V2 or endpoint not found
41
+ try :
42
+ return self ._api .get_inference_deployment_deployments_inference_deployment_id_get (id )
43
+ except ApiException as v2_error :
44
+ # If both fail, raise the original V3 error as it's more likely to be the real issue
45
+ raise e
46
+ else :
47
+ # For other errors (auth, network, etc.), raise immediately
48
+ raise
34
49
35
50
def get_compute (self , id ):
36
51
return self ._api .get_compute_deployment_deployments_compute_deployment_id_get (id )
@@ -52,9 +67,15 @@ def get_cserve(self, id):
52
67
# For other errors (auth, network, etc.), raise immediately
53
68
raise
54
69
55
- def create_inference (self , request : CreateInferenceDeploymentRequest ):
70
+ def create_inference (self , request : CreateInferenceV3DeploymentRequest ):
71
+ return self ._api .create_inference_v3_deployment_deployments_inference_v3_post (request )
72
+
73
+ def create_inference_v2 (self , request : CreateInferenceDeploymentRequest ):
56
74
return self ._api .create_inference_deployment_deployments_inference_post (request )
57
75
76
+ def create_inference_v3 (self , request : CreateInferenceV3DeploymentRequest ):
77
+ return self ._api .create_inference_v3_deployment_deployments_inference_v3_post (request )
78
+
58
79
def create_compute (self , request : CreateComputeDeploymentRequest ):
59
80
return self ._api .create_compute_deployment_deployments_compute_post (request )
60
81
@@ -67,8 +88,51 @@ def create_cserve_v2(self, request: CreateCServeV2DeploymentRequest):
67
88
def create_cserve_v3 (self , request : CreateCServeV3DeploymentRequest ):
68
89
return self ._api .create_cserve_v3_deployment_deployments_cserve_v3_post (request )
69
90
70
- def update_inference (self , deployment_id : int , request : CreateInferenceDeploymentRequest ):
71
- return self ._api .update_inference_deployment_deployments_inference_put (deployment_id , request )
91
+ def detect_inference_deployment_version (self , deployment_id : int ) -> str :
92
+ """Detect if an inference deployment is V2 or V3 by testing the specific API endpoints"""
93
+ try :
94
+ # Try V3 endpoint first
95
+ self ._api .get_inference_v3_deployment_deployments_inference_v3_deployment_id_get (deployment_id )
96
+ return 'v3'
97
+ except ApiException as e :
98
+ if e .status in [404 , 400 ]: # V3 endpoint doesn't exist for this deployment
99
+ try :
100
+ # Try V2 endpoint
101
+ self ._api .get_inference_deployment_deployments_inference_deployment_id_get (deployment_id )
102
+ return 'v2'
103
+ except ApiException :
104
+ # If both fail, it might not be an inference deployment or doesn't exist
105
+ raise ValueError (
106
+ f"Deployment { deployment_id } is not a valid inference deployment or does not exist"
107
+ )
108
+ else :
109
+ # Other error (auth, network, etc.)
110
+ raise
111
+
112
+ def update_inference (
113
+ self , deployment_id : int , request : Union [CreateInferenceDeploymentRequest , CreateInferenceV3DeploymentRequest ]
114
+ ):
115
+ """Update Inference deployment - validates request type matches deployment version"""
116
+ # Detect the deployment version
117
+ deployment_version = self .detect_inference_deployment_version (deployment_id )
118
+
119
+ # Validate request type matches deployment version
120
+ if isinstance (request , CreateInferenceV3DeploymentRequest ):
121
+ if deployment_version != 'v3' :
122
+ raise ValueError (
123
+ f"Deployment { deployment_id } is Inference { deployment_version .upper ()} , but you provided a V3 request. Please use CreateInferenceDeploymentRequest instead."
124
+ )
125
+ return self ._api .update_inference_v3_deployment_deployments_inference_v3_put (deployment_id , request )
126
+ elif isinstance (request , CreateInferenceDeploymentRequest ):
127
+ if deployment_version != 'v2' :
128
+ raise ValueError (
129
+ f"Deployment { deployment_id } is Inference { deployment_version .upper ()} , but you provided a V2 request. Please use CreateInferenceV3DeploymentRequest instead."
130
+ )
131
+ return self ._api .update_inference_deployment_deployments_inference_put (deployment_id , request )
132
+ else :
133
+ raise ValueError (
134
+ f"Unsupported request type: { type (request )} . Expected CreateInferenceDeploymentRequest or CreateInferenceV3DeploymentRequest."
135
+ )
72
136
73
137
def update_compute (self , deployment_id : int , request : CreateComputeDeploymentRequest ):
74
138
return self ._api .update_compute_deployment_deployments_compute_put (deployment_id , request )
0 commit comments