Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor workspace_client_mock to have combine fixtures stored in separate JSON files #955

Merged
merged 19 commits into from
Feb 19, 2024
34 changes: 19 additions & 15 deletions src/databricks/labs/ucx/assessment/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,24 @@
return self._assess_jobs(all_jobs, all_clusters)

def _assess_jobs(self, all_jobs: list[BaseJob], all_clusters_by_id) -> Iterable[JobInfo]:
job_assessment, job_details = self._prepare(all_jobs)
for job, cluster_config in self._get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
job_id = job.job_id
if not job_id:
continue

Check warning on line 64 in src/databricks/labs/ucx/assessment/jobs.py

View check run for this annotation

Codecov / codecov/patch

src/databricks/labs/ucx/assessment/jobs.py#L64

Added line #L64 was not covered by tests
cluster_details = ClusterDetails.from_dict(cluster_config.as_dict())
cluster_failures = self.check_cluster_failures(cluster_details, "Job cluster")
job_assessment[job_id].update(cluster_failures)

# TODO: next person looking at this - rewrite, as this code makes no sense
for job_key in job_details.keys(): # pylint: disable=consider-using-dict-items,consider-iterating-dictionary
job_details[job_key].failures = json.dumps(list(job_assessment[job_key]))
if len(job_assessment[job_key]) > 0:
job_details[job_key].success = 0
return list(job_details.values())

@staticmethod
def _prepare(all_jobs):
job_assessment: dict[int, set[str]] = {}
job_details: dict[int, JobInfo] = {}
for job in all_jobs:
Expand All @@ -82,21 +100,7 @@
success=1,
failures="[]",
)

for job, cluster_config in self._get_cluster_configs_from_all_jobs(all_jobs, all_clusters_by_id):
job_id = job.job_id
if not job_id:
continue
cluster_details = ClusterDetails.from_dict(cluster_config.as_dict())
cluster_failures = self.check_cluster_failures(cluster_details, "Job cluster")
job_assessment[job_id].update(cluster_failures)

# TODO: next person looking at this - rewrite, as this code makes no sense
for job_key in job_details.keys(): # pylint: disable=consider-using-dict-items,consider-iterating-dictionary
job_details[job_key].failures = json.dumps(list(job_assessment[job_key]))
if len(job_assessment[job_key]) > 0:
job_details[job_key].success = 0
return list(job_details.values())
return job_assessment, job_details

def snapshot(self) -> Iterable[JobInfo]:
return self._snapshot(self._try_fetch, self._crawl)
Expand Down
45 changes: 33 additions & 12 deletions tests/unit/assessment/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@


def _base64(filename: str):
with (__dir / filename).open("rb") as f:
return base64.b64encode(f.read())
try:
with (__dir / filename).open("rb") as f:
return base64.b64encode(f.read())
except FileNotFoundError as err:
raise NotFound(filename) from err


def _workspace_export(filename: str):
Expand All @@ -25,12 +28,30 @@ def _workspace_export(filename: str):


def _load_fixture(filename: str):
with (__dir / filename).open("r") as f:
return json.load(f)
try:
with (__dir / filename).open("r") as f:
return json.load(f)
except FileNotFoundError as err:
raise NotFound(filename) from err


def _load_list(cls: type, filename: str):
return [cls.from_dict(_) for _ in _load_fixture(filename)] # type: ignore[attr-defined]
_FOLDERS = {
BaseJob: '../assessment/jobs',
ClusterDetails: '../assessment/clusters',
PipelineStateInfo: '../assessment/pipelines',
}


def _load_list(cls: type, filename: str, ids=None):
if not ids: # TODO: remove
return [cls.from_dict(_) for _ in _load_fixture(filename)] # type: ignore[attr-defined]
return _id_list(cls, ids)


def _id_list(cls: type, ids=None):
if not ids:
return []
return [cls.from_dict(_load_fixture(f'{_FOLDERS[cls]}/{_}.json')) for _ in ids] # type: ignore[attr-defined]


def _cluster_policy(policy_id: str):
Expand All @@ -51,18 +72,18 @@ def _secret_not_found(secret_scope, _):


def workspace_client_mock(
clusters="no-spark-conf.json",
pipelines="single-pipeline.json",
jobs="single-job.json",
cluster_ids: list[str] | None = None,
pipeline_ids: list[str] | None = None,
job_ids: list[str] | None = None,
warehouse_config="single-config.json",
secret_exists=True,
):
ws = create_autospec(WorkspaceClient)
ws.clusters.list.return_value = _load_list(ClusterDetails, f"../assessment/clusters/{clusters}")
ws.clusters.list.return_value = _id_list(ClusterDetails, cluster_ids)
ws.cluster_policies.get = _cluster_policy
ws.pipelines.list_pipelines.return_value = _load_list(PipelineStateInfo, f"../assessment/pipelines/{pipelines}")
ws.pipelines.list_pipelines.return_value = _id_list(PipelineStateInfo, pipeline_ids)
ws.pipelines.get = _pipeline
ws.jobs.list.return_value = _load_list(BaseJob, f"../assessment/jobs/{jobs}")
ws.jobs.list.return_value = _id_list(BaseJob, job_ids)
ws.warehouses.get_workspace_warehouse_config().data_access_config = _load_list(
EndpointConfPair, f"../assessment/warehouses/{warehouse_config}"
)
Expand Down
64 changes: 0 additions & 64 deletions tests/unit/assessment/clusters/assortment-conf.json

This file was deleted.

29 changes: 0 additions & 29 deletions tests/unit/assessment/clusters/assortment-spn.json

This file was deleted.

16 changes: 16 additions & 0 deletions tests/unit/assessment/clusters/azure-spn-secret.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"autoscale": {
"min_workers": 1,
"max_workers": 6
},
"cluster_id": "azure-spn-secret",
"cluster_name": "Azure SPN Secret",
"cluster_source": "UI",
"spark_conf": {
"spark.hadoop.fs.azure.account.oauth2.client.id.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_app_client_id}}",
"spark.hadoop.fs.azure.account.oauth2.client.endpoint.abcde.dfs.core.windows.net": "https://login.microsoftonline.com/dedededede/oauth2/token",
"spark.hadoop.fs.azure.account.oauth2.client.secret.abcde.dfs.core.windows.net": "{{secrets/abcff/sp_secret}}"
},
"spark_context_id": 5134472582179565315,
"spark_version": "13.3.x-cpu-ml-scala2.12"
}
34 changes: 0 additions & 34 deletions tests/unit/assessment/clusters/dbfs-init-scripts.json

This file was deleted.

32 changes: 32 additions & 0 deletions tests/unit/assessment/clusters/init-scripts-dbfs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"autoscale": {
"max_workers": 6,
"min_workers": 1
},
"cluster_id": "01234-11223344-1122334455",
"cluster_name": "UCX Cluster",
"policy_id": "single-user-with-spn",
"spark_version": "13.3.x-cpu-ml-scala2.12",
"init_scripts": [
{
"dbfs": {
"destination": ":/users/test@test.com/init_scripts/test.sh"
}
},
{
"dbfs": {
"destination": "dbfs"
}
},
{
"dbfs": {
"destination": "dbfs:"
}
},
{
"workspace": {
"destination": "init.sh"
}
}
]
}
16 changes: 16 additions & 0 deletions tests/unit/assessment/clusters/job-cluster.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"autoscale": {
"max_workers": 6,
"min_workers": 1
},
"cluster_source": "JOB",
"creator_user_name":"fake@fake.org",
"cluster_id": "0123-190044-1122334422",
"cluster_name": "Single User Cluster Name",
"policy_id": "single-user-with-spn",
"spark_version": "9.3.x-cpu-ml-scala2.12",
"spark_conf" : {
"spark.databricks.delta.preview.enabled": "true"
},
"spark_context_id":"5134472582179565315"
}
33 changes: 0 additions & 33 deletions tests/unit/assessment/clusters/job-source-cluster.json

This file was deleted.

5 changes: 5 additions & 0 deletions tests/unit/assessment/clusters/legacy-passthrough.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cluster_name": "Passthrough cluster",
"spark_version": "12.3.x-cpu-ml-scala2.12",
"data_security_mode": "LEGACY_PASSTHROUGH"
}
15 changes: 0 additions & 15 deletions tests/unit/assessment/clusters/multiple-failures-conf.json

This file was deleted.

5 changes: 5 additions & 0 deletions tests/unit/assessment/clusters/no-isolation.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"cluster_name": "No isolation shared",
"spark_version": "12.3.x-cpu-ml-scala2.12",
"data_security_mode": "NONE"
}
Loading
Loading