|
9 | 9 | CreateComputeDeploymentRequest,
|
10 | 10 | CreateCServeV2DeploymentRequest,
|
11 | 11 | CreateCServeV3DeploymentRequest,
|
12 |
| - CServeV2Recipe, |
13 | 12 | ApiException,
|
14 | 13 | Metric,
|
15 | 14 | )
|
@@ -74,59 +73,50 @@ def update_inference(self, deployment_id: int, request: CreateInferenceDeploymen
|
74 | 73 | def update_compute(self, deployment_id: int, request: CreateComputeDeploymentRequest):
|
75 | 74 | return self._api.update_compute_deployment_deployments_compute_put(deployment_id, request)
|
76 | 75 |
|
| 76 | + def detect_deployment_version(self, deployment_id: int) -> str: |
| 77 | + """Detect if a deployment is V2 or V3 by testing the specific API endpoints""" |
| 78 | + try: |
| 79 | + # Try V3 endpoint first |
| 80 | + self._api.get_cserve_v3_deployment_deployments_cserve_v3_deployment_id_get(deployment_id) |
| 81 | + return 'v3' |
| 82 | + except ApiException as e: |
| 83 | + if e.status in [404, 400]: # V3 endpoint doesn't exist for this deployment |
| 84 | + try: |
| 85 | + # Try V2 endpoint |
| 86 | + self._api.get_cserve_v2_deployment_deployments_cserve_v2_deployment_id_get(deployment_id) |
| 87 | + return 'v2' |
| 88 | + except ApiException: |
| 89 | + # If both fail, it might not be a CServe deployment or doesn't exist |
| 90 | + raise ValueError(f"Deployment {deployment_id} is not a valid CServe deployment or does not exist") |
| 91 | + else: |
| 92 | + # Other error (auth, network, etc.) |
| 93 | + raise |
| 94 | + |
77 | 95 | def update_cserve(
|
78 | 96 | self, deployment_id: int, request: Union[CreateCServeV2DeploymentRequest, CreateCServeV3DeploymentRequest]
|
79 | 97 | ):
|
80 |
| - """Update CServe deployment - automatically handles both V2 and V3 deployments""" |
81 |
| - # Determine the approach based on the request type |
| 98 | + """Update CServe deployment - validates request type matches deployment version""" |
| 99 | + # Detect the deployment version |
| 100 | + deployment_version = self.detect_deployment_version(deployment_id) |
| 101 | + |
| 102 | + # Validate request type matches deployment version |
82 | 103 | if isinstance(request, CreateCServeV3DeploymentRequest):
|
83 |
| - # V3 request - try V3 API first, fallback if deployment is actually V2 |
84 |
| - try: |
85 |
| - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) |
86 |
| - except ApiException as e: |
87 |
| - if e.status in [404, 400]: # V3 API failed, deployment might be V2 |
88 |
| - # Convert V3 request to V2 and try V2 API |
89 |
| - v2_request = self._convert_v3_to_v2_request(request) |
90 |
| - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, v2_request) |
91 |
| - else: |
92 |
| - raise |
| 104 | + if deployment_version != 'v3': |
| 105 | + raise ValueError( |
| 106 | + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V3 request. Please use CreateCServeV2DeploymentRequest instead." |
| 107 | + ) |
| 108 | + return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, request) |
93 | 109 | elif isinstance(request, CreateCServeV2DeploymentRequest):
|
94 |
| - # V2 request - try V2 API first, fallback to V3 if deployment is actually V3 |
95 |
| - try: |
96 |
| - return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) |
97 |
| - except ApiException as e: |
98 |
| - if e.status in [404, 400]: # V2 API failed, deployment might be V3 |
99 |
| - # Convert V2 request to V3 and try V3 API |
100 |
| - v3_request = self.convert_v2_to_v3_request(request) |
101 |
| - return self._api.update_cserve_v3_deployment_deployments_cserve_v3_put(deployment_id, v3_request) |
102 |
| - else: |
103 |
| - raise |
| 110 | + if deployment_version != 'v2': |
| 111 | + raise ValueError( |
| 112 | + f"Deployment {deployment_id} is CServe {deployment_version.upper()}, but you provided a V2 request. Please use CreateCServeV3DeploymentRequest instead." |
| 113 | + ) |
| 114 | + return self._api.update_cserve_v2_deployment_deployments_cserve_v2_put(deployment_id, request) |
104 | 115 | else:
|
105 | 116 | raise ValueError(
|
106 | 117 | f"Unsupported request type: {type(request)}. Expected CreateCServeV2DeploymentRequest or CreateCServeV3DeploymentRequest."
|
107 | 118 | )
|
108 | 119 |
|
109 |
| - def _convert_v3_to_v2_request(self, v3_request: CreateCServeV3DeploymentRequest) -> CreateCServeV2DeploymentRequest: |
110 |
| - """Convert V3 request format to V2 format (reverse of convert_v2_to_v3_request)""" |
111 |
| - # Get all fields from V3 request |
112 |
| - kwargs = v3_request.model_dump() if hasattr(v3_request, 'model_dump') else v3_request.dict() |
113 |
| - |
114 |
| - # Remove old V3 field names |
115 |
| - min_replicas = kwargs.pop('min_replicas', None) |
116 |
| - max_replicas = kwargs.pop('max_replicas', None) |
117 |
| - initial_replicas = kwargs.pop('initial_replicas', None) |
118 |
| - # Remove V3-only fields |
119 |
| - kwargs.pop('max_surge', None) |
120 |
| - kwargs.pop('max_unavailable', None) |
121 |
| - |
122 |
| - # Add new V2 field names |
123 |
| - kwargs['min_scale'] = min_replicas |
124 |
| - kwargs['max_scale'] = max_replicas |
125 |
| - if initial_replicas is not None: |
126 |
| - kwargs['initial_scale'] = initial_replicas |
127 |
| - |
128 |
| - return CreateCServeV2DeploymentRequest(**kwargs) |
129 |
| - |
130 | 120 | def _update_status(self, id, new_status):
|
131 | 121 | status_req = platform_api_python_client.DeploymentStatusRequest(status=new_status)
|
132 | 122 | self._api.update_deployment_status_deployments_status_deployment_id_put(id, status_req)
|
@@ -181,28 +171,6 @@ def detect_cserve_deployment_version(self, deployment_response):
|
181 | 171 | # Default to V2 for backward compatibility
|
182 | 172 | return 'v2'
|
183 | 173 |
|
184 |
| - def convert_v2_to_v3_request(self, v2_request: CreateCServeV2DeploymentRequest) -> CreateCServeV3DeploymentRequest: |
185 |
| - """Convert V2 request format to V3 format with field mapping""" |
186 |
| - # Get all fields from V2 request |
187 |
| - kwargs = v2_request.model_dump() if hasattr(v2_request, 'model_dump') else v2_request.dict() |
188 |
| - |
189 |
| - # Remove old V2 field names |
190 |
| - min_scale = kwargs.pop('min_scale', None) |
191 |
| - max_scale = kwargs.pop('max_scale', None) |
192 |
| - initial_scale = kwargs.pop('initial_scale', None) |
193 |
| - |
194 |
| - # Add new V3 field names |
195 |
| - kwargs['min_replicas'] = min_scale |
196 |
| - kwargs['max_replicas'] = max_scale |
197 |
| - if initial_scale is not None: |
198 |
| - kwargs['initial_replicas'] = initial_scale |
199 |
| - |
200 |
| - # Add V3-specific fields |
201 |
| - kwargs['max_surge'] = None |
202 |
| - kwargs['max_unavailable'] = None |
203 |
| - |
204 |
| - return CreateCServeV3DeploymentRequest(**kwargs) |
205 |
| - |
206 | 174 | # pylint: disable=R0917
|
207 | 175 | def get_deployment_usage(
|
208 | 176 | self, id: int, metric: Metric, start_time_in_seconds: int, end_time_in_seconds: int, step: int
|
|
0 commit comments