From 45a356ca24ce92c80f06c3f60f9969aaf25987e4 Mon Sep 17 00:00:00 2001 From: Miles Yucht Date: Wed, 3 Apr 2024 14:56:58 +0200 Subject: [PATCH] Override INVALID_PARAMETER_VALUE on fetching non-existent job/cluster (#591) ## Changes Port of https://github.com/databricks/databricks-sdk-go/pull/864 to the Python SDK. Most services use RESOURCE_DOES_NOT_EXIST error code with 404 status code to indicate that a resource doesn't exist. However, for legacy reasons, Jobs and Clusters services use INVALID_PARAMETER_VALUE error code with 400 status code instead. This makes tools like Terraform and UCX difficult to maintain, as these services need different error handling logic. However, we can't change these behaviors as customers already depend on the raw HTTP response status & contents. This PR corrects these errors in the SDK itself. SDK users can then do ``` try: w.jobs.get_by_id('abc') except ResourceDoesNotExist: pass ``` just as you would expect from other resources. Updated the README with more information about this as well. ## Tests - [ ] `make test` run locally - [ ] `make fmt` applied - [ ] relevant integration tests applied --- .codegen.json | 3 +- .codegen/error_overrides.py.tmpl | 20 +++++++++++++ .gitattributes | 1 + README.md | 18 ++++++++++++ databricks/sdk/core.py | 2 +- databricks/sdk/errors/base.py | 46 +++++++++++++++++++++++++++++- databricks/sdk/errors/mapper.py | 10 ++++++- databricks/sdk/errors/overrides.py | 25 ++++++++++++++++ tests/test_errors.py | 46 ++++++++++++++++++++++++++---- 9 files changed, 162 insertions(+), 9 deletions(-) create mode 100644 .codegen/error_overrides.py.tmpl create mode 100644 databricks/sdk/errors/overrides.py diff --git a/.codegen.json b/.codegen.json index c190dc41..87443e2c 100644 --- a/.codegen.json +++ b/.codegen.json @@ -8,7 +8,8 @@ }, "batch": { ".codegen/__init__.py.tmpl": "databricks/sdk/__init__.py", - ".codegen/error_mapping.py.tmpl": "databricks/sdk/errors/platform.py" + ".codegen/error_mapping.py.tmpl": "databricks/sdk/errors/platform.py", + ".codegen/error_overrides.py.tmpl": "databricks/sdk/errors/overrides.py" }, "samples": { ".codegen/example.py.tmpl": "examples/{{.Service.SnakeName}}/{{.Method.SnakeName}}_{{.SnakeName}}.py" diff --git a/.codegen/error_overrides.py.tmpl b/.codegen/error_overrides.py.tmpl new file mode 100644 index 00000000..6bb85d6c --- /dev/null +++ b/.codegen/error_overrides.py.tmpl @@ -0,0 +1,20 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +from .base import _ErrorOverride +from .platform import * +import re + + +_ALL_OVERRIDES = [ + {{ range .ErrorOverrides -}} + _ErrorOverride( + debug_name="{{.Name}}", + path_regex=re.compile(r'{{.PathRegex}}'), + verb="{{.Verb}}", + status_code_matcher=re.compile(r'{{.StatusCodeMatcher}}'), + error_code_matcher=re.compile(r'{{.ErrorCodeMatcher}}'), + message_matcher=re.compile(r'{{.MessageMatcher}}'), + custom_error={{.OverrideErrorCode.PascalName}}, + ), +{{- end }} +] diff --git a/.gitattributes b/.gitattributes index d36f22ee..3b4f3ee3 100755 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ databricks/sdk/__init__.py linguist-generated=true +databricks/sdk/errors/overrides.py linguist-generated=true databricks/sdk/errors/platform.py linguist-generated=true databricks/sdk/service/billing.py linguist-generated=true databricks/sdk/service/catalog.py linguist-generated=true diff --git a/README.md b/README.md index 17c32398..dec52c8e 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ The SDK's internal HTTP client is robust and handles failures on different level - [Long-running operations](#long-running-operations) - [Paginated responses](#paginated-responses) - [Single-sign-on with OAuth](#single-sign-on-sso-with-oauth) +- [Error handling](#error-handling) - [Logging](#logging) - [Integration with `dbutils`](#interaction-with-dbutils) - [Interface stability](#interface-stability) @@ -507,6 +508,23 @@ logging.info(f'Created new custom app: ' f'--client_secret {custom_app.client_secret}') ``` +## Error handling + +The Databricks SDK for Python provides a robust error-handling mechanism that allows developers to catch and handle API errors. When an error occurs, the SDK will raise an exception that contains information about the error, such as the HTTP status code, error message, and error details. Developers can catch these exceptions and handle them appropriately in their code. + +```python +from databricks.sdk import WorkspaceClient +from databricks.sdk.errors import ResourceDoesNotExist + +w = WorkspaceClient() +try: + w.clusters.get(cluster_id='1234-5678-9012') +except ResourceDoesNotExist as e: + print(f'Cluster not found: {e}') +``` + +The SDK handles inconsistencies in error responses amongst the different services, providing a consistent interface for developers to work with. Simply catch the appropriate exception type and handle the error as needed. The errors returned by the Databricks API are defined in [databricks/sdk/errors/platform.py](https://github.com/databricks/databricks-sdk-py/blob/main/databricks/sdk/errors/platform.py). + ## Logging The Databricks SDK for Python seamlessly integrates with the standard [Logging facility for Python](https://docs.python.org/3/library/logging.html). diff --git a/databricks/sdk/core.py b/databricks/sdk/core.py index 789200b2..bbb45310 100644 --- a/databricks/sdk/core.py +++ b/databricks/sdk/core.py @@ -264,7 +264,7 @@ def _make_nicer_error(self, *, response: requests.Response, **kwargs) -> Databri if is_too_many_requests_or_unavailable: kwargs['retry_after_secs'] = self._parse_retry_after(response) kwargs['message'] = message - return error_mapper(status_code, kwargs) + return error_mapper(response, kwargs) def _record_request_log(self, response: requests.Response, raw=False): if not logger.isEnabledFor(logging.DEBUG): diff --git a/databricks/sdk/errors/base.py b/databricks/sdk/errors/base.py index d03b0d8c..89be376b 100644 --- a/databricks/sdk/errors/base.py +++ b/databricks/sdk/errors/base.py @@ -1,4 +1,8 @@ -from typing import Dict, List +import re +from dataclasses import dataclass +from typing import Dict, List, Optional + +import requests class ErrorDetail: @@ -63,3 +67,43 @@ def _get_details_by_type(self, error_type) -> List[ErrorDetail]: if self.details == None: return [] return [detail for detail in self.details if detail.type == error_type] + + +@dataclass +class _ErrorOverride: + # The name of the override. Used for logging purposes. + debug_name: str + + # A regex that must match the path of the request for this override to be applied. + path_regex: re.Pattern + + # The HTTP method of the request for the override to apply + verb: str + + # The custom error class to use for this override. + custom_error: type + + # A regular expression that must match the error code for this override to be applied. If None, + # this field is ignored. + status_code_matcher: Optional[re.Pattern] = None + + # A regular expression that must match the error code for this override to be applied. If None, + # this field is ignored. + error_code_matcher: Optional[re.Pattern] = None + + # A regular expression that must match the message for this override to be applied. If None, + # this field is ignored. + message_matcher: Optional[re.Pattern] = None + + def matches(self, response: requests.Response, raw_error: dict): + if response.request.method != self.verb: + return False + if not self.path_regex.match(response.request.path_url): + return False + if self.status_code_matcher and not self.status_code_matcher.match(str(response.status_code)): + return False + if self.error_code_matcher and not self.error_code_matcher.match(raw_error.get('error_code', '')): + return False + if self.message_matcher and not self.message_matcher.match(raw_error.get('message', '')): + return False + return True diff --git a/databricks/sdk/errors/mapper.py b/databricks/sdk/errors/mapper.py index 225be818..c4ba0d55 100644 --- a/databricks/sdk/errors/mapper.py +++ b/databricks/sdk/errors/mapper.py @@ -1,8 +1,16 @@ +import requests + from databricks.sdk.errors import platform from databricks.sdk.errors.base import DatabricksError +from .overrides import _ALL_OVERRIDES + -def error_mapper(status_code: int, raw: dict) -> DatabricksError: +def error_mapper(response: requests.Response, raw: dict) -> DatabricksError: + for override in _ALL_OVERRIDES: + if override.matches(response, raw): + return override.custom_error(**raw) + status_code = response.status_code error_code = raw.get('error_code', None) if error_code in platform.ERROR_CODE_MAPPING: # more specific error codes override more generic HTTP status codes diff --git a/databricks/sdk/errors/overrides.py b/databricks/sdk/errors/overrides.py new file mode 100644 index 00000000..492b2caa --- /dev/null +++ b/databricks/sdk/errors/overrides.py @@ -0,0 +1,25 @@ +# Code generated from OpenAPI specs by Databricks SDK Generator. DO NOT EDIT. + +import re + +from .base import _ErrorOverride +from .platform import ResourceDoesNotExist + +_ALL_OVERRIDES = [ + _ErrorOverride(debug_name="Clusters InvalidParameterValue=>ResourceDoesNotExist", + path_regex=re.compile(r'^/api/2\.\d/clusters/get'), + verb="GET", + status_code_matcher=re.compile(r'^400$'), + error_code_matcher=re.compile(r'INVALID_PARAMETER_VALUE'), + message_matcher=re.compile(r'Cluster .* does not exist'), + custom_error=ResourceDoesNotExist, + ), + _ErrorOverride(debug_name="Jobs InvalidParameterValue=>ResourceDoesNotExist", + path_regex=re.compile(r'^/api/2\.\d/jobs/get'), + verb="GET", + status_code_matcher=re.compile(r'^400$'), + error_code_matcher=re.compile(r'INVALID_PARAMETER_VALUE'), + message_matcher=re.compile(r'Job .* does not exist'), + custom_error=ResourceDoesNotExist, + ), +] diff --git a/tests/test_errors.py b/tests/test_errors.py index 2b7cafa6..d9243615 100644 --- a/tests/test_errors.py +++ b/tests/test_errors.py @@ -1,25 +1,36 @@ import pytest +import requests from databricks.sdk import errors +def fake_response(status_code: int) -> requests.Response: + resp = requests.Response() + resp.status_code = status_code + resp.request = requests.Request('GET', 'https://databricks.com/api/2.0/service').prepare() + return resp + + def test_error_code_has_precedence_over_http_status(): - err = errors.error_mapper(400, {'error_code': 'INVALID_PARAMETER_VALUE', 'message': 'nope'}) + err = errors.error_mapper(fake_response(400), { + 'error_code': 'INVALID_PARAMETER_VALUE', + 'message': 'nope' + }) assert errors.InvalidParameterValue == type(err) def test_http_status_code_maps_fine(): - err = errors.error_mapper(400, {'error_code': 'MALFORMED_REQUEST', 'message': 'nope'}) + err = errors.error_mapper(fake_response(400), {'error_code': 'MALFORMED_REQUEST', 'message': 'nope'}) assert errors.BadRequest == type(err) def test_other_errors_also_map_fine(): - err = errors.error_mapper(417, {'error_code': 'WHOOPS', 'message': 'nope'}) + err = errors.error_mapper(fake_response(417), {'error_code': 'WHOOPS', 'message': 'nope'}) assert errors.DatabricksError == type(err) def test_missing_error_code(): - err = errors.error_mapper(522, {'message': 'nope'}) + err = errors.error_mapper(fake_response(522), {'message': 'nope'}) assert errors.DatabricksError == type(err) @@ -48,6 +59,31 @@ def test_missing_error_code(): (444, ..., errors.DatabricksError), (444, ..., IOError), ]) def test_subclasses(status_code, error_code, klass): try: - raise errors.error_mapper(status_code, {'error_code': error_code, 'message': 'nope'}) + raise errors.error_mapper(fake_response(status_code), {'error_code': error_code, 'message': 'nope'}) except klass: return + + +@pytest.mark.parametrize('verb, path, status_code, error_code, message, expected_error', + [[ + 'GET', '/api/2.0/clusters/get', 400, 'INVALID_PARAMETER_VALUE', + 'Cluster abcde does not exist', errors.ResourceDoesNotExist + ], + [ + 'GET', '/api/2.0/jobs/get', 400, 'INVALID_PARAMETER_VALUE', + 'Job abcde does not exist', errors.ResourceDoesNotExist + ], + [ + 'GET', '/api/2.1/jobs/get', 400, 'INVALID_PARAMETER_VALUE', + 'Job abcde does not exist', errors.ResourceDoesNotExist + ], + [ + 'GET', '/api/2.1/jobs/get', 400, 'INVALID_PARAMETER_VALUE', + 'Invalid spark version', errors.InvalidParameterValue + ], ]) +def test_error_overrides(verb, path, status_code, error_code, message, expected_error): + resp = requests.Response() + resp.status_code = status_code + resp.request = requests.Request(verb, f'https://databricks.com{path}').prepare() + with pytest.raises(expected_error): + raise errors.error_mapper(resp, {'error_code': error_code, 'message': message})