From d336eb55606571d58759a8d6fae491ef3a2f97fc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:09:21 +0200 Subject: [PATCH 01/43] Add pip installer --- src/databricks/labs/ucx/source_code/graph.py | 14 ++++++++ .../labs/ucx/source_code/site_packages.py | 36 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index efeff2391b..154292d80f 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -199,6 +199,20 @@ def __repr__(self): return f"" +class BaseLibraryInstaller(abc.ABC): + """ + A library installer makes libraries available by installing them. + + Installation is a pre-requisite for libraries that are not available on the system (yet). After installation, + they can be registered during dependency graph building. + """ + + @abc.abstractmethod + def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: + """Install a library and augment path look-up so that it is able to resolve the library.""" + raise NotImplementedError() + + class Dependency(abc.ABC): def __init__(self, loader: DependencyLoader, path: Path): diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index f3fba67d87..65390fe8dd 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -1,7 +1,43 @@ from __future__ import annotations import os +import subprocess +import tempfile +from functools import cached_property from pathlib import Path +from subprocess import CalledProcessError + +from databricks.labs.ucx.source_code.graph import BaseLibraryInstaller, DependencyProblem +from databricks.labs.ucx.source_code.path_lookup import PathLookup + + +class PipInstaller(BaseLibraryInstaller): + # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies + # TODO: https://github.com/databrickslabs/ucx/issues/1642 + # TODO: https://github.com/databrickslabs/ucx/issues/1643 + # TODO: https://github.com/databrickslabs/ucx/issues/1640 + + def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: + """Pip install library and augment path look-up so that is able to resolve the library""" + # invoke pip install via subprocess to install this library into lib_install_folder + pip_install_arguments = ["pip", "install", library, "-t", self._temporary_virtual_environment.as_posix()] + try: + subprocess.run(pip_install_arguments, check=True) + except CalledProcessError as e: + return [DependencyProblem("library-install-failed", f"Failed to install {library}: {e}")] + + path_lookup.append_path(self._temporary_virtual_environment) + return [] + + @cached_property + def _temporary_virtual_environment(self) -> Path: + # TODO: for `databricks labs ucx lint-local-code`, detect if we already have a virtual environment + # and use that one. See Databricks CLI code for the labs command to see how to detect the virtual + # environment. If we don't have a virtual environment, create a temporary one. + # simulate notebook-scoped virtual environment + lib_install_folder = tempfile.mkdtemp(prefix="ucx-") + return Path(lib_install_folder) + COMMENTED_OUT_FOR_PR_1685 = """ class SitePackageContainer(SourceContainer): From c13c1e60f2448e1b299c2a90d6c7f58aad42fffc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:11:43 +0200 Subject: [PATCH 02/43] Add pip installer to context --- src/databricks/labs/ucx/contexts/application.py | 6 +++++- tests/integration/contexts/test_global_context.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index f2fc31913b..3f6e63f447 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -43,7 +43,7 @@ from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.graph import DependencyResolver from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist -from databricks.labs.ucx.source_code.site_packages import SitePackages +from databricks.labs.ucx.source_code.site_packages import PipInstaller, SitePackages from databricks.labs.ucx.source_code.languages import Languages from databricks.labs.ucx.source_code.redash import Redash from databricks.labs.ucx.workspace_access import generic, redash @@ -350,6 +350,10 @@ def workspace_info(self): def verify_has_metastore(self): return VerifyHasMetastore(self.workspace_client) + @cached_property + def pip_installer(self): + return PipInstaller() + @cached_property def notebook_loader(self) -> NotebookLoader: return NotebookLoader() diff --git a/tests/integration/contexts/test_global_context.py b/tests/integration/contexts/test_global_context.py index 99c2c2fb5b..2a422d5608 100644 --- a/tests/integration/contexts/test_global_context.py +++ b/tests/integration/contexts/test_global_context.py @@ -7,3 +7,4 @@ def test_cached_properties(): assert ctx.site_packages is not None assert ctx.site_packages_path is not None assert ctx.dependency_resolver is not None + assert ctx.dependency_resolver is not None \ No newline at end of file From 66d0ae33f4e8558944be483770dd35e41d7b5547 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:25:33 +0200 Subject: [PATCH 03/43] Add installer to dependency graph --- src/databricks/labs/ucx/source_code/graph.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 154292d80f..dce4b4cdbf 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -24,11 +24,13 @@ def __init__( self, dependency: Dependency, parent: DependencyGraph | None, + installer: BaseLibraryInstaller | None, resolver: DependencyResolver, path_lookup: PathLookup, ): self._dependency = dependency self._parent = parent + self._installer = installer self._resolver = resolver self._path_lookup = path_lookup.change_directory(dependency.path.parent) self._dependencies: dict[Dependency, DependencyGraph] = {} @@ -45,6 +47,11 @@ def dependency(self): def path(self): return self._dependency.path + def install_library(self, library: str) -> list[DependencyProblem]: + """Install a library and augment path look-up so that it is able to resolve the library.""" + assert self._installer is not None + return self._installer.install_library(self._path_lookup, library) + def register_library(self, name: str) -> MaybeGraph: # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies # TODO: https://github.com/databrickslabs/ucx/issues/1642 @@ -77,7 +84,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph: self._dependencies[dependency] = maybe.graph return maybe # nay, create the child graph and populate it - child_graph = DependencyGraph(dependency, self, self._resolver, self._path_lookup) + child_graph = DependencyGraph(dependency, self, self._installer, self._resolver, self._path_lookup) self._dependencies[dependency] = child_graph container = dependency.load(self.path_lookup) if not container: @@ -362,7 +369,7 @@ def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: maybe = resolver.resolve_local_file(self._path_lookup, path) if not maybe.dependency: return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) - graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) + graph = DependencyGraph(maybe.dependency, None, None, self, self._path_lookup) container = maybe.dependency.load(graph.path_lookup) if container is None: problem = DependencyProblem('cannot-load-file', f"Could not load file {path}") @@ -387,7 +394,7 @@ def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph: maybe = self._notebook_resolver.resolve_notebook(self._path_lookup, path) if not maybe.dependency: return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) - graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) + graph = DependencyGraph(maybe.dependency, None, None, self, self._path_lookup) container = maybe.dependency.load(graph.path_lookup) if container is None: problem = DependencyProblem('cannot-load-notebook', f"Could not load notebook {path}") From 7bc69b5c7ebccd5d7b0b0cb5358a86a5e703ab01 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:26:14 +0200 Subject: [PATCH 04/43] Pass installer form context to workflow linter to dependency graph --- src/databricks/labs/ucx/contexts/application.py | 1 + src/databricks/labs/ucx/source_code/jobs.py | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 3f6e63f447..c6b2648cb7 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -402,6 +402,7 @@ def dependency_resolver(self): def workflow_linter(self): return WorkflowLinter( self.workspace_client, + self.pip_installer, self.dependency_resolver, self.path_lookup, MigrationIndex([]), # TODO: bring back self.tables_migrator.index() diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index ef161af0ad..a69bc0b0e7 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -162,11 +162,13 @@ class WorkflowLinter: def __init__( self, ws: WorkspaceClient, + installer: BaseLibraryInstaller, resolver: DependencyResolver, path_lookup: PathLookup, migration_index: MigrationIndex, ): self._ws = ws + self._installer = installer self._resolver = resolver self._path_lookup = path_lookup self._migration_index = migration_index @@ -216,7 +218,7 @@ def _lint_job(self, job: jobs.Job) -> list[JobProblem]: def _lint_task(self, task: jobs.Task, job: jobs.Job): dependency: Dependency = WorkflowTask(self._ws, task, job) - graph = DependencyGraph(dependency, None, self._resolver, self._path_lookup) + graph = DependencyGraph(dependency, None, None, self._resolver, self._path_lookup) container = dependency.load(self._path_lookup) assert container is not None # because we just created it problems = container.build_dependency_graph(graph) From 7e962ff6851f0b625a2eb3360f2830f500c2038f Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:26:31 +0200 Subject: [PATCH 05/43] Use installer in job task --- src/databricks/labs/ucx/source_code/jobs.py | 65 ++++++++++--------- .../contexts/test_global_context.py | 2 +- 2 files changed, 37 insertions(+), 30 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index a69bc0b0e7..04ca103124 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -15,6 +15,7 @@ from databricks.labs.ucx.source_code.base import CurrentSessionState from databricks.labs.ucx.source_code.files import LocalFile from databricks.labs.ucx.source_code.graph import ( + BaseLibraryInstaller, DependencyGraph, SourceContainer, DependencyProblem, @@ -63,50 +64,63 @@ def __init__(self, ws: WorkspaceClient, task: jobs.Task): self._ws = ws def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: + installation_problems = list(self._install_libraries(parent)) + if len(installation_problems) > 0: + return installation_problems return list(self._register_task_dependencies(parent)) - def _register_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: - yield from self._register_libraries(graph) - yield from self._register_notebook(graph) - yield from self._register_spark_python_task(graph) - yield from self._register_python_wheel_task(graph) - yield from self._register_spark_jar_task(graph) - yield from self._register_run_job_task(graph) - yield from self._register_pipeline_task(graph) - yield from self._register_existing_cluster_id(graph) - yield from self._register_spark_submit_task(graph) + def _install_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: + yield from self._install_libraries(graph) + yield from self._install_cluster_libraries(graph) - def _register_libraries(self, graph: DependencyGraph): + def _install_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.libraries: return for library in self._task.libraries: - yield from self._lint_library(library, graph) + yield from self._install_library(graph, library) - def _lint_library(self, library: compute.Library, graph: DependencyGraph) -> Iterable[DependencyProblem]: + @staticmethod + def _install_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: if library.pypi: # TODO: https://github.com/databrickslabs/ucx/issues/1642 - maybe = graph.register_library(library.pypi.package) - if maybe.problems: - yield from maybe.problems + problems = graph.install_library(library.pypi.package) + if len(problems) > 0: + yield from problems if library.jar: # TODO: https://github.com/databrickslabs/ucx/issues/1641 yield DependencyProblem('not-yet-implemented', 'Jar library is not yet implemented') if library.egg: # TODO: https://github.com/databrickslabs/ucx/issues/1643 - maybe = graph.register_library(library.egg) - if maybe.problems: - yield from maybe.problems + problems = graph.install_library(library.egg) + if len(problems) > 0: + yield from problems if library.whl: # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path # TODO: https://github.com/databrickslabs/ucx/issues/1640 - maybe = graph.register_library(library.whl) - if maybe.problems: - yield from maybe.problems + problems = graph.install_library(library.whl) + if len(problems) > 0: + yield from problems if library.requirements: # TODO: download and add every entry via graph.register_library # TODO: https://github.com/databrickslabs/ucx/issues/1644 yield DependencyProblem('not-yet-implemented', 'Requirements library is not yet implemented') + def _install_cluster_libraries(self, graph: DependencyGraph): # pylint: disable=unused-argument + if not self._task.existing_cluster_id: + return + # TODO: https://github.com/databrickslabs/ucx/issues/1637 + # load libraries installed on the referred cluster + yield DependencyProblem('not-yet-implemented', 'Existing cluster id is not yet implemented') + + def _register_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: + yield from self._register_notebook(graph) + yield from self._register_spark_python_task(graph) + yield from self._register_python_wheel_task(graph) + yield from self._register_spark_jar_task(graph) + yield from self._register_run_job_task(graph) + yield from self._register_pipeline_task(graph) + yield from self._register_spark_submit_task(graph) + def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.notebook_task: return [] @@ -145,13 +159,6 @@ def _register_pipeline_task(self, graph: DependencyGraph): # pylint: disable=un # TODO: https://github.com/databrickslabs/ucx/issues/1638 yield DependencyProblem('not-yet-implemented', 'Pipeline task is not yet implemented') - def _register_existing_cluster_id(self, graph: DependencyGraph): # pylint: disable=unused-argument - if not self._task.existing_cluster_id: - return - # TODO: https://github.com/databrickslabs/ucx/issues/1637 - # load libraries installed on the referred cluster - yield DependencyProblem('not-yet-implemented', 'Existing cluster id is not yet implemented') - def _register_spark_submit_task(self, graph: DependencyGraph): # pylint: disable=unused-argument if not self._task.spark_submit_task: return diff --git a/tests/integration/contexts/test_global_context.py b/tests/integration/contexts/test_global_context.py index 2a422d5608..00cf2daced 100644 --- a/tests/integration/contexts/test_global_context.py +++ b/tests/integration/contexts/test_global_context.py @@ -7,4 +7,4 @@ def test_cached_properties(): assert ctx.site_packages is not None assert ctx.site_packages_path is not None assert ctx.dependency_resolver is not None - assert ctx.dependency_resolver is not None \ No newline at end of file + assert ctx.dependency_resolver is not None From 86b26e147646f53f3ae6868a271bb06633d2f636 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:28:05 +0200 Subject: [PATCH 06/43] Add TODO's --- src/databricks/labs/ucx/source_code/site_packages.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 65390fe8dd..51e4fa4095 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -78,6 +78,8 @@ def __init__(self, packages: list[SitePackage]): self._packages[top_level] = package def __getitem__(self, item: str) -> SitePackage | None: + # TODO: Replace hyphen with underscores + # TODO: Don't use get, raise KeyError return self._packages.get(item, None) From 51721274106232a6c065d26d5cc5117ad7598a5e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:29:15 +0200 Subject: [PATCH 07/43] Add pip installer tests --- tests/unit/source_code/test_site_packages.py | 26 ++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 1a5eaaaa51..97486ddfa0 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -1,8 +1,30 @@ -from databricks.labs.ucx.source_code.site_packages import SitePackages +from pathlib import Path + +from databricks.labs.ucx.source_code.site_packages import PipInstaller, SitePackages from tests.unit import locate_site_packages +def test_pip_installer_install_library(mock_path_lookup): + """Install and resolve pytest""" + pip_installer = PipInstaller() + problems = pip_installer.install_library(mock_path_lookup, "pytest") + + assert len(problems) == 0 + assert mock_path_lookup.resolve(Path("pytest")).exists() + + +def test_pip_installer_install_library_unknown_library(mock_path_lookup): + """Installing unknown library returns problem""" + pip_installer = PipInstaller() + problems = pip_installer.install_library(mock_path_lookup, "unknown-library-name") + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install unknown-library-name") + assert mock_path_lookup.resolve(Path("unknown-library-name")) is None + + def test_reads_site_packages(): site_packages_path = locate_site_packages() - site_packages = SitePackages.parse(site_packages_path) + site_packages = SitePackages.parse(str(site_packages_path)) assert site_packages["astroid"] is not None From 1efa14fc92e2098e342d561288839b8a52817b4b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:39:21 +0200 Subject: [PATCH 08/43] Pass installer on dependency graph --- src/databricks/labs/ucx/source_code/jobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index 04ca103124..a7c81cfcfc 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -225,7 +225,7 @@ def _lint_job(self, job: jobs.Job) -> list[JobProblem]: def _lint_task(self, task: jobs.Task, job: jobs.Job): dependency: Dependency = WorkflowTask(self._ws, task, job) - graph = DependencyGraph(dependency, None, None, self._resolver, self._path_lookup) + graph = DependencyGraph(dependency, None, self._installer, self._resolver, self._path_lookup) container = dependency.load(self._path_lookup) assert container is not None # because we just created it problems = container.build_dependency_graph(graph) From 8871b80ec75e1d4724019b575bfb710f75fadfbe Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:40:20 +0200 Subject: [PATCH 09/43] Fix type --- tests/unit/source_code/test_site_packages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 97486ddfa0..8e1eb970a9 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -26,5 +26,5 @@ def test_pip_installer_install_library_unknown_library(mock_path_lookup): def test_reads_site_packages(): site_packages_path = locate_site_packages() - site_packages = SitePackages.parse(str(site_packages_path)) + site_packages = SitePackages.parse(site_packages_path) assert site_packages["astroid"] is not None From 420b91893a803c9d5f28259c599eb4bfd3b5cbc1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:45:48 +0200 Subject: [PATCH 10/43] Test pip installer on global context --- tests/integration/contexts/test_global_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/contexts/test_global_context.py b/tests/integration/contexts/test_global_context.py index 00cf2daced..ba54c1a442 100644 --- a/tests/integration/contexts/test_global_context.py +++ b/tests/integration/contexts/test_global_context.py @@ -7,4 +7,4 @@ def test_cached_properties(): assert ctx.site_packages is not None assert ctx.site_packages_path is not None assert ctx.dependency_resolver is not None - assert ctx.dependency_resolver is not None + assert ctx.pip_installer is not None From ba7eaed94acabda441d898ad03986c53e33c5bce Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 09:48:39 +0200 Subject: [PATCH 11/43] Remove raise NotImplementedError --- src/databricks/labs/ucx/source_code/graph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index dce4b4cdbf..715bce7292 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -217,7 +217,6 @@ class BaseLibraryInstaller(abc.ABC): @abc.abstractmethod def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: """Install a library and augment path look-up so that it is able to resolve the library.""" - raise NotImplementedError() class Dependency(abc.ABC): From 45e467c761e052f02d25d23d75118f670a30339e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 10:24:53 +0200 Subject: [PATCH 12/43] Add test for workflow task container --- tests/unit/source_code/test_jobs.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/unit/source_code/test_jobs.py diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py new file mode 100644 index 0000000000..66aa31b0df --- /dev/null +++ b/tests/unit/source_code/test_jobs.py @@ -0,0 +1,24 @@ +from pathlib import Path + +from databricks.sdk import WorkspaceClient +from databricks.sdk.service import jobs + +from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver +from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer + + +def test_workflow_task_container_build_dependency_graph_no_dependency_problems(mock_path_lookup): + """No dependency problems""" + ws = WorkspaceClient() + task = jobs.Task(task_key="test") + + file_resolver = LocalFileResolver(FileLoader()) + dependency_resolver = DependencyResolver([file_resolver], mock_path_lookup) + maybe = dependency_resolver.resolve_local_file(mock_path_lookup, Path("leaf1.py.txt")) + graph = DependencyGraph(maybe.dependency, None, None, dependency_resolver, mock_path_lookup) + + workflow_task_container = WorkflowTaskContainer(ws, task) + dependency_problems = workflow_task_container.build_dependency_graph(graph) + + assert len(dependency_problems) == 0 From 5e2a0c36afcc14a0f42d0b6a1445e22418fc51be Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 10:29:37 +0200 Subject: [PATCH 13/43] Use mock autospec for class --- tests/unit/source_code/test_jobs.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 66aa31b0df..68fcd433cd 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -1,4 +1,5 @@ from pathlib import Path +from unittest.mock import create_autospec from databricks.sdk import WorkspaceClient from databricks.sdk.service import jobs @@ -10,7 +11,7 @@ def test_workflow_task_container_build_dependency_graph_no_dependency_problems(mock_path_lookup): """No dependency problems""" - ws = WorkspaceClient() + ws = create_autospec(WorkspaceClient) task = jobs.Task(task_key="test") file_resolver = LocalFileResolver(FileLoader()) @@ -22,3 +23,4 @@ def test_workflow_task_container_build_dependency_graph_no_dependency_problems(m dependency_problems = workflow_task_container.build_dependency_graph(graph) assert len(dependency_problems) == 0 + ws.assert_not_called() From 013d8a72d03bb552a7c8062608d2aa44e6056451 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 10:32:14 +0200 Subject: [PATCH 14/43] Avoid disabling pylint --- src/databricks/labs/ucx/source_code/jobs.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index a7c81cfcfc..e253e84d73 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -105,7 +105,8 @@ def _install_library(graph: DependencyGraph, library: compute.Library) -> Iterab # TODO: https://github.com/databrickslabs/ucx/issues/1644 yield DependencyProblem('not-yet-implemented', 'Requirements library is not yet implemented') - def _install_cluster_libraries(self, graph: DependencyGraph): # pylint: disable=unused-argument + def _install_cluster_libraries(self, graph: DependencyGraph): + _ = graph if not self._task.existing_cluster_id: return # TODO: https://github.com/databrickslabs/ucx/issues/1637 From bc36dbe011d13b7a48b768e24ee2d4ce78344916 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:25:03 +0200 Subject: [PATCH 15/43] Add test for attribute not None --- tests/unit/contexts/__init__.py | 0 tests/unit/contexts/test_application.py | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/unit/contexts/__init__.py create mode 100644 tests/unit/contexts/test_application.py diff --git a/tests/unit/contexts/__init__.py b/tests/unit/contexts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py new file mode 100644 index 0000000000..5121c6cfe1 --- /dev/null +++ b/tests/unit/contexts/test_application.py @@ -0,0 +1,8 @@ +from databricks.labs.ucx.contexts.application import GlobalContext + + +def test_global_context_attributes_not_none(): + """Attributes should be not None""" + # Goal is to improve test coverage + ctx = GlobalContext() + assert ctx.pip_installer is not None From 69f7d81934757b84facc788b155d3f2915d56757 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:26:04 +0200 Subject: [PATCH 16/43] Rewrite to pytest.mark.parameterize --- tests/unit/contexts/test_application.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py index 5121c6cfe1..93c1fdab39 100644 --- a/tests/unit/contexts/test_application.py +++ b/tests/unit/contexts/test_application.py @@ -1,8 +1,12 @@ +import pytest + from databricks.labs.ucx.contexts.application import GlobalContext -def test_global_context_attributes_not_none(): +@pytest.mark.parametrize("attribute", ["pip_installer"]) +def test_global_context_attributes_not_none(attribute: str): """Attributes should be not None""" # Goal is to improve test coverage ctx = GlobalContext() - assert ctx.pip_installer is not None + assert hasattr(ctx, attribute) + assert getattr(ctx, attribute) is not None From 8a4caddb9f9b095fbd3d7ed1d89a310306db6f86 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:27:22 +0200 Subject: [PATCH 17/43] Move global context integration tests to unit test --- tests/integration/contexts/__init__.py | 0 tests/integration/contexts/test_global_context.py | 10 ---------- tests/unit/contexts/test_application.py | 2 +- 3 files changed, 1 insertion(+), 11 deletions(-) delete mode 100644 tests/integration/contexts/__init__.py delete mode 100644 tests/integration/contexts/test_global_context.py diff --git a/tests/integration/contexts/__init__.py b/tests/integration/contexts/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/contexts/test_global_context.py b/tests/integration/contexts/test_global_context.py deleted file mode 100644 index ba54c1a442..0000000000 --- a/tests/integration/contexts/test_global_context.py +++ /dev/null @@ -1,10 +0,0 @@ -from databricks.labs.ucx.contexts.application import GlobalContext - - -def test_cached_properties(): - # purpose of the below is merely to improve test coverage - ctx = GlobalContext() - assert ctx.site_packages is not None - assert ctx.site_packages_path is not None - assert ctx.dependency_resolver is not None - assert ctx.pip_installer is not None diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py index 93c1fdab39..57ef7e2f66 100644 --- a/tests/unit/contexts/test_application.py +++ b/tests/unit/contexts/test_application.py @@ -3,7 +3,7 @@ from databricks.labs.ucx.contexts.application import GlobalContext -@pytest.mark.parametrize("attribute", ["pip_installer"]) +@pytest.mark.parametrize("attribute", ["dependency_resolver", "pip_installer", "site_packages", "site_packages_path"]) def test_global_context_attributes_not_none(attribute: str): """Attributes should be not None""" # Goal is to improve test coverage From 562c75898c11d7f3af1a66a5d40383c824bd03a2 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:32:01 +0200 Subject: [PATCH 18/43] Add unit test for graph install library --- tests/unit/source_code/test_graph.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/unit/source_code/test_graph.py diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py new file mode 100644 index 0000000000..b2ca69e284 --- /dev/null +++ b/tests/unit/source_code/test_graph.py @@ -0,0 +1,18 @@ +from pathlib import Path + +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.site_packages import PipInstaller + + +def test_dependency_graph_install_library_pytest(mock_path_lookup): + """Install pytest using pip installer""" + dependency = Dependency(FileLoader(), Path("test")) + installer = PipInstaller() + dependency_resolver = DependencyResolver([], mock_path_lookup) + graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) + + dependency_problems = graph.install_library("pytest") + + assert len(dependency_problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() From 490149b4c764e33239e9c8a4ac96de38063a1c5e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:37:43 +0200 Subject: [PATCH 19/43] Fix reference to install task libraries --- src/databricks/labs/ucx/source_code/jobs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index e253e84d73..8f6652aadc 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -64,12 +64,12 @@ def __init__(self, ws: WorkspaceClient, task: jobs.Task): self._ws = ws def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: - installation_problems = list(self._install_libraries(parent)) + installation_problems = list(self._install_task_libraries(parent)) if len(installation_problems) > 0: return installation_problems return list(self._register_task_dependencies(parent)) - def _install_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: + def _install_task_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: yield from self._install_libraries(graph) yield from self._install_cluster_libraries(graph) From a22a29d7176bb6e49e988f319164884f3e403a24 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:44:31 +0200 Subject: [PATCH 20/43] Test install pypi package in job --- tests/unit/source_code/test_jobs.py | 38 ++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 68fcd433cd..445b825890 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -1,26 +1,46 @@ from pathlib import Path from unittest.mock import create_autospec +import pytest from databricks.sdk import WorkspaceClient -from databricks.sdk.service import jobs +from databricks.sdk.service import compute, jobs -from databricks.labs.ucx.source_code.files import FileLoader, LocalFileResolver -from databricks.labs.ucx.source_code.graph import DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer +from databricks.labs.ucx.source_code.site_packages import PipInstaller -def test_workflow_task_container_build_dependency_graph_no_dependency_problems(mock_path_lookup): - """No dependency problems""" +@pytest.fixture +def graph(mock_path_lookup) -> DependencyGraph: + dependency = Dependency(FileLoader(), Path("test")) + installer = PipInstaller() + dependency_resolver = DependencyResolver([], mock_path_lookup) + graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) + return graph + + +def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_lookup, graph): + """No dependency problems with empty task""" ws = create_autospec(WorkspaceClient) task = jobs.Task(task_key="test") - file_resolver = LocalFileResolver(FileLoader()) - dependency_resolver = DependencyResolver([file_resolver], mock_path_lookup) - maybe = dependency_resolver.resolve_local_file(mock_path_lookup, Path("leaf1.py.txt")) - graph = DependencyGraph(maybe.dependency, None, None, dependency_resolver, mock_path_lookup) + workflow_task_container = WorkflowTaskContainer(ws, task) + dependency_problems = workflow_task_container.build_dependency_graph(graph) + + assert len(dependency_problems) == 0 + ws.assert_not_called() + + +def test_workflow_task_container_build_dependency_graph_pytest_pypi_library(mock_path_lookup, graph): + """Install pytest PyPi library""" + ws = create_autospec(WorkspaceClient) + libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="pytest"))] + task = jobs.Task(task_key="test", libraries=libraries) workflow_task_container = WorkflowTaskContainer(ws, task) dependency_problems = workflow_task_container.build_dependency_graph(graph) assert len(dependency_problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() ws.assert_not_called() From 6f08e06709b70d604680b7803252446e859b90f6 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 11:46:09 +0200 Subject: [PATCH 21/43] Test for installing unknown library --- tests/unit/source_code/test_jobs.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 445b825890..8dd0ac1700 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -26,9 +26,9 @@ def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_loo task = jobs.Task(task_key="test") workflow_task_container = WorkflowTaskContainer(ws, task) - dependency_problems = workflow_task_container.build_dependency_graph(graph) + problems = workflow_task_container.build_dependency_graph(graph) - assert len(dependency_problems) == 0 + assert len(problems) == 0 ws.assert_not_called() @@ -39,8 +39,24 @@ def test_workflow_task_container_build_dependency_graph_pytest_pypi_library(mock task = jobs.Task(task_key="test", libraries=libraries) workflow_task_container = WorkflowTaskContainer(ws, task) - dependency_problems = workflow_task_container.build_dependency_graph(graph) + problems = workflow_task_container.build_dependency_graph(graph) - assert len(dependency_problems) == 0 + assert len(problems) == 0 assert graph.path_lookup.resolve(Path("pytest")).exists() ws.assert_not_called() + + +def test_workflow_task_container_build_dependency_graph_unknown_pypi_library(mock_path_lookup, graph): + """Install unknown PyPi library""" + ws = create_autospec(WorkspaceClient) + libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="unknown-library-name"))] + task = jobs.Task(task_key="test", libraries=libraries) + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install unknown-library-name") + assert mock_path_lookup.resolve(Path("unknown-library-name")) is None + ws.assert_not_called() From 6a8a46fe548f7dbf4a80973c2b47766b5fd4b46b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 12:20:34 +0200 Subject: [PATCH 22/43] Rewrite Installer similar to Resolver --- src/databricks/labs/ucx/source_code/graph.py | 40 ++++++++++++++++++- src/databricks/labs/ucx/source_code/jobs.py | 10 ++--- .../labs/ucx/source_code/site_packages.py | 8 +++- tests/unit/source_code/test_graph.py | 4 +- tests/unit/source_code/test_jobs.py | 8 ++-- tests/unit/source_code/test_site_packages.py | 4 +- 6 files changed, 59 insertions(+), 15 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 715bce7292..f905fe60ed 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -24,7 +24,7 @@ def __init__( self, dependency: Dependency, parent: DependencyGraph | None, - installer: BaseLibraryInstaller | None, + installer: LibraryInstaller | None, resolver: DependencyResolver, path_lookup: PathLookup, ): @@ -214,9 +214,47 @@ class BaseLibraryInstaller(abc.ABC): they can be registered during dependency graph building. """ + def __init__(self, next_installer: BaseLibraryInstaller | None) -> None: + self._next_installer = next_installer + @abc.abstractmethod + def with_next_installer(self, installer: BaseLibraryInstaller) -> BaseLibraryInstaller: + """Chain next installer.""" + + @property + def next_installer(self): + return self._next_installer + + def install_library_pip(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: + """Install a library with pip and augment path look-up so that it is able to resolve the library.""" + assert self._next_installer is not None + return self._next_installer.install_library_pip(path_lookup, library) + + +class StubInstaller(BaseLibraryInstaller): + """Last installer in the chain""" + + def __init__(self): + super().__init__(None) + + def with_next_installer(self, installer: BaseLibraryInstaller) -> BaseLibraryInstaller: + raise NotImplementedError("Should never happen!") + + +class LibraryInstaller: + def __init__(self, installers: list[BaseLibraryInstaller]): + previous: BaseLibraryInstaller = StubInstaller() + for installer in installers: + installer = installer.with_next_installer(previous) + previous = installer + self._installer: BaseLibraryInstaller = previous + def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: """Install a library and augment path look-up so that it is able to resolve the library.""" + return self._installer.install_library_pip(path_lookup, library) + + def __repr__(self): + return f"" class Dependency(abc.ABC): diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index 8f6652aadc..7ad09decc3 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -15,13 +15,13 @@ from databricks.labs.ucx.source_code.base import CurrentSessionState from databricks.labs.ucx.source_code.files import LocalFile from databricks.labs.ucx.source_code.graph import ( - BaseLibraryInstaller, + Dependency, DependencyGraph, - SourceContainer, DependencyProblem, - Dependency, - WrappingLoader, DependencyResolver, + LibraryInstaller, + SourceContainer, + WrappingLoader, ) from databricks.labs.ucx.source_code.languages import Languages from databricks.labs.ucx.source_code.notebooks.sources import Notebook, NotebookLinter, FileLinter @@ -170,7 +170,7 @@ class WorkflowLinter: def __init__( self, ws: WorkspaceClient, - installer: BaseLibraryInstaller, + installer: LibraryInstaller, resolver: DependencyResolver, path_lookup: PathLookup, migration_index: MigrationIndex, diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 51e4fa4095..3473541b8a 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -17,7 +17,13 @@ class PipInstaller(BaseLibraryInstaller): # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 - def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: + def __init__(self, next_installer: BaseLibraryInstaller | None = None) -> None: + super().__init__(next_installer) + + def with_next_installer(self, installer: BaseLibraryInstaller) -> PipInstaller: + return PipInstaller(self._next_installer) + + def install_library_pip(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: """Pip install library and augment path look-up so that is able to resolve the library""" # invoke pip install via subprocess to install this library into lib_install_folder pip_install_arguments = ["pip", "install", library, "-t", self._temporary_virtual_environment.as_posix()] diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index b2ca69e284..b83ac5a31b 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -1,14 +1,14 @@ from pathlib import Path from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller from databricks.labs.ucx.source_code.site_packages import PipInstaller def test_dependency_graph_install_library_pytest(mock_path_lookup): """Install pytest using pip installer""" dependency = Dependency(FileLoader(), Path("test")) - installer = PipInstaller() + installer = LibraryInstaller([PipInstaller()]) dependency_resolver = DependencyResolver([], mock_path_lookup) graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 8dd0ac1700..b40bc00080 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -6,7 +6,7 @@ from databricks.sdk.service import compute, jobs from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer from databricks.labs.ucx.source_code.site_packages import PipInstaller @@ -14,10 +14,10 @@ @pytest.fixture def graph(mock_path_lookup) -> DependencyGraph: dependency = Dependency(FileLoader(), Path("test")) - installer = PipInstaller() + installer = LibraryInstaller([PipInstaller()]) dependency_resolver = DependencyResolver([], mock_path_lookup) - graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) - return graph + dependency_graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) + return dependency_graph def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_lookup, graph): diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 8e1eb970a9..9cd2b23c51 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -7,7 +7,7 @@ def test_pip_installer_install_library(mock_path_lookup): """Install and resolve pytest""" pip_installer = PipInstaller() - problems = pip_installer.install_library(mock_path_lookup, "pytest") + problems = pip_installer.install_library_pip(mock_path_lookup, "pytest") assert len(problems) == 0 assert mock_path_lookup.resolve(Path("pytest")).exists() @@ -16,7 +16,7 @@ def test_pip_installer_install_library(mock_path_lookup): def test_pip_installer_install_library_unknown_library(mock_path_lookup): """Installing unknown library returns problem""" pip_installer = PipInstaller() - problems = pip_installer.install_library(mock_path_lookup, "unknown-library-name") + problems = pip_installer.install_library_pip(mock_path_lookup, "unknown-library-name") assert len(problems) == 1 assert problems[0].code == "library-install-failed" From 7917ebf79cf286c880d9edfef798a7b796676592 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 12:35:05 +0200 Subject: [PATCH 23/43] Add coverage for not yet implemented install libraries --- src/databricks/labs/ucx/source_code/graph.py | 8 ++++++++ src/databricks/labs/ucx/source_code/jobs.py | 4 +++- tests/unit/source_code/test_jobs.py | 15 +++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index f905fe60ed..4bb0755f28 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -251,6 +251,14 @@ def __init__(self, installers: list[BaseLibraryInstaller]): def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: """Install a library and augment path look-up so that it is able to resolve the library.""" + if library.endswith(".jar"): + return [DependencyProblem("not-yet-implemented", "Jar library is not yet implemented")] + if library.endswith(".egg"): + return [DependencyProblem("not-yet-implemented", "Egg library is not yet implemented")] + if library.endswith(".whl"): + return [DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented")] + if library.endswith(".txt"): + return [DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented")] return self._installer.install_library_pip(path_lookup, library) def __repr__(self): diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index 7ad09decc3..f17c7dd000 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -103,7 +103,9 @@ def _install_library(graph: DependencyGraph, library: compute.Library) -> Iterab if library.requirements: # TODO: download and add every entry via graph.register_library # TODO: https://github.com/databrickslabs/ucx/issues/1644 - yield DependencyProblem('not-yet-implemented', 'Requirements library is not yet implemented') + problems = graph.install_library(library.requirements) + if len(problems) > 0: + yield from problems def _install_cluster_libraries(self, graph: DependencyGraph): _ = graph diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index b40bc00080..69b6840f0b 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -20,6 +20,21 @@ def graph(mock_path_lookup) -> DependencyGraph: return dependency_graph +def test_workflow_task_container_build_dependency_graph_not_yet_implemented(mock_path_lookup, graph): + """Receive not yet implemented problems""" + # Goal of test is to raise test coverage, remove after implementing + ws = create_autospec(WorkspaceClient) + library = compute.Library(jar="library.jar", egg="library.egg", whl="libary.whl", requirements="requirements.txt") + task = jobs.Task(task_key="test", libraries=[library], existing_cluster_id="id") + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 5 + assert all(problem.code == "not-yet-implemented" for problem in problems) + ws.assert_not_called() + + def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_lookup, graph): """No dependency problems with empty task""" ws = create_autospec(WorkspaceClient) From 490677f076525874318a8dcdeabd13ab3bd895c6 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 13:40:22 +0200 Subject: [PATCH 24/43] Test raise NotImplementedError --- tests/unit/source_code/test_graph.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index b83ac5a31b..ce84d0a8ca 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -1,7 +1,9 @@ from pathlib import Path +import pytest + from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller, StubInstaller from databricks.labs.ucx.source_code.site_packages import PipInstaller @@ -16,3 +18,10 @@ def test_dependency_graph_install_library_pytest(mock_path_lookup): assert len(dependency_problems) == 0 assert graph.path_lookup.resolve(Path("pytest")).exists() + + +def test_stub_installer_raises_not_implemented_error(): + """Raise NotImplementedError""" + installer = StubInstaller() + with pytest.raises(NotImplementedError): + installer.with_next_installer(installer) From 70253073ee8331292fb24a4cb2efb0ae8465780e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 13:57:49 +0200 Subject: [PATCH 25/43] Format --- tests/unit/source_code/test_graph.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index ce84d0a8ca..44f6234fc3 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -3,7 +3,13 @@ import pytest from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller, StubInstaller +from databricks.labs.ucx.source_code.graph import ( + Dependency, + DependencyGraph, + DependencyResolver, + LibraryInstaller, + StubInstaller, +) from databricks.labs.ucx.source_code.site_packages import PipInstaller From 08d3cb9a427057d47b4786032da9c9922cc46cdc Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 16:21:17 +0200 Subject: [PATCH 26/43] Merge Installer with Resolver --- src/databricks/labs/ucx/source_code/graph.py | 107 ++++++------------ src/databricks/labs/ucx/source_code/jobs.py | 63 +++++------ .../labs/ucx/source_code/site_packages.py | 23 ++-- tests/unit/contexts/test_application.py | 2 +- tests/unit/source_code/test_graph.py | 30 ++--- tests/unit/source_code/test_jobs.py | 9 +- tests/unit/source_code/test_site_packages.py | 24 ++-- 7 files changed, 95 insertions(+), 163 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 4bb0755f28..a957f5ed67 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -24,13 +24,11 @@ def __init__( self, dependency: Dependency, parent: DependencyGraph | None, - installer: LibraryInstaller | None, resolver: DependencyResolver, path_lookup: PathLookup, ): self._dependency = dependency self._parent = parent - self._installer = installer self._resolver = resolver self._path_lookup = path_lookup.change_directory(dependency.path.parent) self._dependencies: dict[Dependency, DependencyGraph] = {} @@ -47,17 +45,12 @@ def dependency(self): def path(self): return self._dependency.path - def install_library(self, library: str) -> list[DependencyProblem]: - """Install a library and augment path look-up so that it is able to resolve the library.""" - assert self._installer is not None - return self._installer.install_library(self._path_lookup, library) - - def register_library(self, name: str) -> MaybeGraph: - # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies - # TODO: https://github.com/databrickslabs/ucx/issues/1642 - # TODO: https://github.com/databrickslabs/ucx/issues/1643 - # TODO: https://github.com/databrickslabs/ucx/issues/1640 - return MaybeGraph(None, [DependencyProblem('not-yet-implemented', f'Library dependency: {name}')]) + def register_library(self, library: str) -> list[DependencyProblem]: + maybe = self._resolver.resolve_library(self.path_lookup, library) + if not maybe.dependency: + return maybe.problems + maybe_graph = self.register_dependency(maybe.dependency) + return maybe_graph.problems def register_notebook(self, path: Path) -> list[DependencyProblem]: maybe = self._resolver.resolve_notebook(self.path_lookup, path) @@ -84,7 +77,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph: self._dependencies[dependency] = maybe.graph return maybe # nay, create the child graph and populate it - child_graph = DependencyGraph(dependency, self, self._installer, self._resolver, self._path_lookup) + child_graph = DependencyGraph(dependency, self, self._resolver, self._path_lookup) self._dependencies[dependency] = child_graph container = dependency.load(self.path_lookup) if not container: @@ -206,65 +199,6 @@ def __repr__(self): return f"" -class BaseLibraryInstaller(abc.ABC): - """ - A library installer makes libraries available by installing them. - - Installation is a pre-requisite for libraries that are not available on the system (yet). After installation, - they can be registered during dependency graph building. - """ - - def __init__(self, next_installer: BaseLibraryInstaller | None) -> None: - self._next_installer = next_installer - - @abc.abstractmethod - def with_next_installer(self, installer: BaseLibraryInstaller) -> BaseLibraryInstaller: - """Chain next installer.""" - - @property - def next_installer(self): - return self._next_installer - - def install_library_pip(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: - """Install a library with pip and augment path look-up so that it is able to resolve the library.""" - assert self._next_installer is not None - return self._next_installer.install_library_pip(path_lookup, library) - - -class StubInstaller(BaseLibraryInstaller): - """Last installer in the chain""" - - def __init__(self): - super().__init__(None) - - def with_next_installer(self, installer: BaseLibraryInstaller) -> BaseLibraryInstaller: - raise NotImplementedError("Should never happen!") - - -class LibraryInstaller: - def __init__(self, installers: list[BaseLibraryInstaller]): - previous: BaseLibraryInstaller = StubInstaller() - for installer in installers: - installer = installer.with_next_installer(previous) - previous = installer - self._installer: BaseLibraryInstaller = previous - - def install_library(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: - """Install a library and augment path look-up so that it is able to resolve the library.""" - if library.endswith(".jar"): - return [DependencyProblem("not-yet-implemented", "Jar library is not yet implemented")] - if library.endswith(".egg"): - return [DependencyProblem("not-yet-implemented", "Egg library is not yet implemented")] - if library.endswith(".whl"): - return [DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented")] - if library.endswith(".txt"): - return [DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented")] - return self._installer.install_library_pip(path_lookup, library) - - def __repr__(self): - return f"" - - class Dependency(abc.ABC): def __init__(self, loader: DependencyLoader, path: Path): @@ -342,6 +276,11 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: assert self._next_resolver is not None return self._next_resolver.resolve_import(path_lookup, name) + def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + # TODO: remove StubResolver and return MaybeDependency(None, [...]) + assert self._next_resolver is not None + return self._next_resolver.resolve_library_pip(path_lookup, name) + class BaseFileResolver(abc.ABC): @@ -361,6 +300,9 @@ def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._fail('import-not-found', f"Could not locate import: {name}") + def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + return self._fail('library-not-found', f"Could not resolve library: {name}") + @staticmethod def _fail(code: str, message: str): return MaybeDependency(None, [DependencyProblem(code, message)]) @@ -404,6 +346,21 @@ def resolve_notebook(self, path_lookup: PathLookup, path: Path) -> MaybeDependen def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._import_resolver.resolve_import(path_lookup, name) + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + if library.endswith(".jar"): + problem = DependencyProblem("not-yet-implemented", "Jar library is not yet implemented") + return MaybeDependency(None, [problem]) + if library.endswith(".egg"): + problem = DependencyProblem("not-yet-implemented", "Egg library is not yet implemented") + return MaybeDependency(None, [problem]) + if library.endswith(".whl"): + problem = DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented") + return MaybeDependency(None, [problem]) + if library.endswith(".txt"): + problem = DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented") + return MaybeDependency(None, [problem]) + return self._resolver.resolve_library_pip(path_lookup, library) + def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. In case of problems, the paths in the problems will be relative to the starting path lookup.""" @@ -414,7 +371,7 @@ def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: maybe = resolver.resolve_local_file(self._path_lookup, path) if not maybe.dependency: return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) - graph = DependencyGraph(maybe.dependency, None, None, self, self._path_lookup) + graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) container = maybe.dependency.load(graph.path_lookup) if container is None: problem = DependencyProblem('cannot-load-file', f"Could not load file {path}") @@ -439,7 +396,7 @@ def build_notebook_dependency_graph(self, path: Path) -> MaybeGraph: maybe = self._notebook_resolver.resolve_notebook(self._path_lookup, path) if not maybe.dependency: return MaybeGraph(None, self._make_relative_paths(maybe.problems, path)) - graph = DependencyGraph(maybe.dependency, None, None, self, self._path_lookup) + graph = DependencyGraph(maybe.dependency, None, self, self._path_lookup) container = maybe.dependency.load(graph.path_lookup) if container is None: problem = DependencyProblem('cannot-load-notebook', f"Could not load notebook {path}") diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index f17c7dd000..d7ebc274f9 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -19,7 +19,6 @@ DependencyGraph, DependencyProblem, DependencyResolver, - LibraryInstaller, SourceContainer, WrappingLoader, ) @@ -64,66 +63,55 @@ def __init__(self, ws: WorkspaceClient, task: jobs.Task): self._ws = ws def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]: - installation_problems = list(self._install_task_libraries(parent)) - if len(installation_problems) > 0: - return installation_problems return list(self._register_task_dependencies(parent)) - def _install_task_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: - yield from self._install_libraries(graph) - yield from self._install_cluster_libraries(graph) + def _register_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: + yield from self._register_libraries(graph) + yield from self._register_notebook(graph) + yield from self._register_spark_python_task(graph) + yield from self._register_python_wheel_task(graph) + yield from self._register_spark_jar_task(graph) + yield from self._register_run_job_task(graph) + yield from self._register_pipeline_task(graph) + yield from self._register_existing_cluster_id(graph) + yield from self._register_spark_submit_task(graph) - def _install_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: + def _register_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.libraries: return for library in self._task.libraries: - yield from self._install_library(graph, library) + yield from self._register_library(graph, library) @staticmethod - def _install_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: + def _register_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: if library.pypi: # TODO: https://github.com/databrickslabs/ucx/issues/1642 - problems = graph.install_library(library.pypi.package) + problems = graph.register_library(library.pypi.package) if len(problems) > 0: yield from problems if library.jar: # TODO: https://github.com/databrickslabs/ucx/issues/1641 - yield DependencyProblem('not-yet-implemented', 'Jar library is not yet implemented') + problems = graph.register_library(library.jar) + if len(problems) > 0: + yield from problems if library.egg: # TODO: https://github.com/databrickslabs/ucx/issues/1643 - problems = graph.install_library(library.egg) + problems = graph.register_library(library.egg) if len(problems) > 0: yield from problems if library.whl: # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path # TODO: https://github.com/databrickslabs/ucx/issues/1640 - problems = graph.install_library(library.whl) + problems = graph.register_library(library.whl) if len(problems) > 0: yield from problems if library.requirements: # TODO: download and add every entry via graph.register_library # TODO: https://github.com/databrickslabs/ucx/issues/1644 - problems = graph.install_library(library.requirements) + problems = graph.register_library(library.requirements) if len(problems) > 0: yield from problems - def _install_cluster_libraries(self, graph: DependencyGraph): - _ = graph - if not self._task.existing_cluster_id: - return - # TODO: https://github.com/databrickslabs/ucx/issues/1637 - # load libraries installed on the referred cluster - yield DependencyProblem('not-yet-implemented', 'Existing cluster id is not yet implemented') - - def _register_task_dependencies(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: - yield from self._register_notebook(graph) - yield from self._register_spark_python_task(graph) - yield from self._register_python_wheel_task(graph) - yield from self._register_spark_jar_task(graph) - yield from self._register_run_job_task(graph) - yield from self._register_pipeline_task(graph) - yield from self._register_spark_submit_task(graph) - def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.notebook_task: return [] @@ -162,6 +150,13 @@ def _register_pipeline_task(self, graph: DependencyGraph): # pylint: disable=un # TODO: https://github.com/databrickslabs/ucx/issues/1638 yield DependencyProblem('not-yet-implemented', 'Pipeline task is not yet implemented') + def _register_existing_cluster_id(self, graph: DependencyGraph): # pylint: disable=unused-argument + if not self._task.existing_cluster_id: + return + # TODO: https://github.com/databrickslabs/ucx/issues/1637 + # load libraries installed on the referred cluster + yield DependencyProblem('not-yet-implemented', 'Existing cluster id is not yet implemented') + def _register_spark_submit_task(self, graph: DependencyGraph): # pylint: disable=unused-argument if not self._task.spark_submit_task: return @@ -172,13 +167,11 @@ class WorkflowLinter: def __init__( self, ws: WorkspaceClient, - installer: LibraryInstaller, resolver: DependencyResolver, path_lookup: PathLookup, migration_index: MigrationIndex, ): self._ws = ws - self._installer = installer self._resolver = resolver self._path_lookup = path_lookup self._migration_index = migration_index @@ -228,7 +221,7 @@ def _lint_job(self, job: jobs.Job) -> list[JobProblem]: def _lint_task(self, task: jobs.Task, job: jobs.Job): dependency: Dependency = WorkflowTask(self._ws, task, job) - graph = DependencyGraph(dependency, None, self._installer, self._resolver, self._path_lookup) + graph = DependencyGraph(dependency, None, self._resolver, self._path_lookup) container = dependency.load(self._path_lookup) assert container is not None # because we just created it problems = container.build_dependency_graph(graph) diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 3473541b8a..211cd90f90 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -7,33 +7,34 @@ from pathlib import Path from subprocess import CalledProcessError -from databricks.labs.ucx.source_code.graph import BaseLibraryInstaller, DependencyProblem +from databricks.labs.ucx.source_code.graph import BaseDependencyResolver, DependencyProblem, MaybeDependency from databricks.labs.ucx.source_code.path_lookup import PathLookup -class PipInstaller(BaseLibraryInstaller): +class PipResolver(BaseDependencyResolver): # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies # TODO: https://github.com/databrickslabs/ucx/issues/1642 # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 - def __init__(self, next_installer: BaseLibraryInstaller | None = None) -> None: - super().__init__(next_installer) + def __init__(self, next_resolver: BaseDependencyResolver | None = None) -> None: + super().__init__(next_resolver) - def with_next_installer(self, installer: BaseLibraryInstaller) -> PipInstaller: - return PipInstaller(self._next_installer) + def with_next_resolver(self, resolver: BaseDependencyResolver) -> PipResolver: + return PipResolver(resolver) - def install_library_pip(self, path_lookup: PathLookup, library: str) -> list[DependencyProblem]: - """Pip install library and augment path look-up so that is able to resolve the library""" + def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + """Pip install library and augment path look-up to resolve the library at import""" # invoke pip install via subprocess to install this library into lib_install_folder - pip_install_arguments = ["pip", "install", library, "-t", self._temporary_virtual_environment.as_posix()] + pip_install_arguments = ["pip", "install", name, "-t", self._temporary_virtual_environment.as_posix()] try: subprocess.run(pip_install_arguments, check=True) except CalledProcessError as e: - return [DependencyProblem("library-install-failed", f"Failed to install {library}: {e}")] + problem = DependencyProblem("library-install-failed", f"Failed to install {name}: {e}") + return MaybeDependency(None, [problem]) path_lookup.append_path(self._temporary_virtual_environment) - return [] + return MaybeDependency(None, []) @cached_property def _temporary_virtual_environment(self) -> Path: diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py index 57ef7e2f66..373b631374 100644 --- a/tests/unit/contexts/test_application.py +++ b/tests/unit/contexts/test_application.py @@ -3,7 +3,7 @@ from databricks.labs.ucx.contexts.application import GlobalContext -@pytest.mark.parametrize("attribute", ["dependency_resolver", "pip_installer", "site_packages", "site_packages_path"]) +@pytest.mark.parametrize("attribute", ["dependency_resolver", "pip_resolver", "site_packages", "site_packages_path"]) def test_global_context_attributes_not_none(attribute: str): """Attributes should be not None""" # Goal is to improve test coverage diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 44f6234fc3..27c135de6e 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -1,33 +1,17 @@ from pathlib import Path -import pytest - from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import ( - Dependency, - DependencyGraph, - DependencyResolver, - LibraryInstaller, - StubInstaller, -) -from databricks.labs.ucx.source_code.site_packages import PipInstaller +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.site_packages import PipResolver -def test_dependency_graph_install_library_pytest(mock_path_lookup): +def test_dependency_graph_register_library(mock_path_lookup): """Install pytest using pip installer""" dependency = Dependency(FileLoader(), Path("test")) - installer = LibraryInstaller([PipInstaller()]) - dependency_resolver = DependencyResolver([], mock_path_lookup) - graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) - dependency_problems = graph.install_library("pytest") + problems = graph.register_library("pytest") - assert len(dependency_problems) == 0 + assert len(problems) == 0 assert graph.path_lookup.resolve(Path("pytest")).exists() - - -def test_stub_installer_raises_not_implemented_error(): - """Raise NotImplementedError""" - installer = StubInstaller() - with pytest.raises(NotImplementedError): - installer.with_next_installer(installer) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 69b6840f0b..2dfa5d8c0e 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -6,17 +6,16 @@ from databricks.sdk.service import compute, jobs from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, LibraryInstaller +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer -from databricks.labs.ucx.source_code.site_packages import PipInstaller +from databricks.labs.ucx.source_code.site_packages import PipResolver @pytest.fixture def graph(mock_path_lookup) -> DependencyGraph: dependency = Dependency(FileLoader(), Path("test")) - installer = LibraryInstaller([PipInstaller()]) - dependency_resolver = DependencyResolver([], mock_path_lookup) - dependency_graph = DependencyGraph(dependency, None, installer, dependency_resolver, mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], mock_path_lookup) + dependency_graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) return dependency_graph diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 9cd2b23c51..ef7b734bab 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -1,26 +1,24 @@ from pathlib import Path -from databricks.labs.ucx.source_code.site_packages import PipInstaller, SitePackages +from databricks.labs.ucx.source_code.site_packages import PipResolver, SitePackages from tests.unit import locate_site_packages -def test_pip_installer_install_library(mock_path_lookup): - """Install and resolve pytest""" - pip_installer = PipInstaller() - problems = pip_installer.install_library_pip(mock_path_lookup, "pytest") +def test_pip_resolver_library(mock_path_lookup): + pip_installer = PipResolver() + maybe = pip_installer.resolve_library_pip(mock_path_lookup, "pytest") - assert len(problems) == 0 + assert len(maybe.problems) == 0 assert mock_path_lookup.resolve(Path("pytest")).exists() -def test_pip_installer_install_library_unknown_library(mock_path_lookup): - """Installing unknown library returns problem""" - pip_installer = PipInstaller() - problems = pip_installer.install_library_pip(mock_path_lookup, "unknown-library-name") +def test_pip_resolver_unknown_library(mock_path_lookup): + pip_installer = PipResolver() + maybe = pip_installer.resolve_library_pip(mock_path_lookup, "unknown-library-name") - assert len(problems) == 1 - assert problems[0].code == "library-install-failed" - assert problems[0].message.startswith("Failed to install unknown-library-name") + assert len(maybe.problems) == 1 + assert maybe.problems[0].code == "library-install-failed" + assert maybe.problems[0].message.startswith("Failed to install unknown-library-name") assert mock_path_lookup.resolve(Path("unknown-library-name")) is None From 9cc9218cb4485701f8856aada2bfebd9525defe0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 16:46:58 +0200 Subject: [PATCH 27/43] Fix merge issues --- src/databricks/labs/ucx/contexts/application.py | 9 ++++----- src/databricks/labs/ucx/source_code/graph.py | 2 +- src/databricks/labs/ucx/source_code/site_packages.py | 8 ++++---- tests/unit/source_code/test_graph.py | 3 ++- tests/unit/source_code/test_jobs.py | 6 +++++- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index c6b2648cb7..6d69a9d16d 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -43,7 +43,7 @@ from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.graph import DependencyResolver from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist -from databricks.labs.ucx.source_code.site_packages import PipInstaller, SitePackages +from databricks.labs.ucx.source_code.site_packages import PipResolver, SitePackages from databricks.labs.ucx.source_code.languages import Languages from databricks.labs.ucx.source_code.redash import Redash from databricks.labs.ucx.workspace_access import generic, redash @@ -351,8 +351,8 @@ def verify_has_metastore(self): return VerifyHasMetastore(self.workspace_client) @cached_property - def pip_installer(self): - return PipInstaller() + def pip_resolver(self): + return PipResolver() @cached_property def notebook_loader(self) -> NotebookLoader: @@ -395,14 +395,13 @@ def file_resolver(self): @cached_property def dependency_resolver(self): - import_resolvers = [self.file_resolver, self.whitelist_resolver] + import_resolvers = [self.pip_resolver, self.file_resolver, self.whitelist_resolver] return DependencyResolver(self.notebook_resolver, import_resolvers, self.path_lookup) @cached_property def workflow_linter(self): return WorkflowLinter( self.workspace_client, - self.pip_installer, self.dependency_resolver, self.path_lookup, MigrationIndex([]), # TODO: bring back self.tables_migrator.index() diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index a957f5ed67..798733c60f 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -359,7 +359,7 @@ def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDepende if library.endswith(".txt"): problem = DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented") return MaybeDependency(None, [problem]) - return self._resolver.resolve_library_pip(path_lookup, library) + return self._import_resolver.resolve_library_pip(path_lookup, library) def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 211cd90f90..0d0faacf4d 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -7,20 +7,20 @@ from pathlib import Path from subprocess import CalledProcessError -from databricks.labs.ucx.source_code.graph import BaseDependencyResolver, DependencyProblem, MaybeDependency +from databricks.labs.ucx.source_code.graph import BaseImportResolver, DependencyProblem, MaybeDependency from databricks.labs.ucx.source_code.path_lookup import PathLookup -class PipResolver(BaseDependencyResolver): +class PipResolver(BaseImportResolver): # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies # TODO: https://github.com/databrickslabs/ucx/issues/1642 # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 - def __init__(self, next_resolver: BaseDependencyResolver | None = None) -> None: + def __init__(self, next_resolver: BaseImportResolver | None = None) -> None: super().__init__(next_resolver) - def with_next_resolver(self, resolver: BaseDependencyResolver) -> PipResolver: + def with_next_resolver(self, resolver: BaseImportResolver) -> PipResolver: return PipResolver(resolver) def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 27c135de6e..8287de44b9 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -2,13 +2,14 @@ from databricks.labs.ucx.source_code.files import FileLoader from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.site_packages import PipResolver def test_dependency_graph_register_library(mock_path_lookup): """Install pytest using pip installer""" dependency = Dependency(FileLoader(), Path("test")) - dependency_resolver = DependencyResolver([PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver(NotebookResolver(NotebookLoader()), [PipResolver()], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) problems = graph.register_library("pytest") diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 2dfa5d8c0e..7f25035b5e 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -8,13 +8,17 @@ from databricks.labs.ucx.source_code.files import FileLoader from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer +from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.site_packages import PipResolver @pytest.fixture def graph(mock_path_lookup) -> DependencyGraph: dependency = Dependency(FileLoader(), Path("test")) - dependency_resolver = DependencyResolver([PipResolver()], mock_path_lookup) + notebook_loader = NotebookLoader() + notebook_resolver = NotebookResolver(notebook_loader) + import_resolver = PipResolver() + dependency_resolver = DependencyResolver(notebook_resolver, [import_resolver], mock_path_lookup) dependency_graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) return dependency_graph From 4f3afe6b3ce45ec13e94d9c353d46a4dba53df13 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 17:05:45 +0200 Subject: [PATCH 28/43] Avoid complexity warning --- src/databricks/labs/ucx/source_code/jobs.py | 36 +++++++-------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index d7ebc274f9..cb486abdc3 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -84,31 +84,17 @@ def _register_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProb @staticmethod def _register_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: - if library.pypi: - # TODO: https://github.com/databrickslabs/ucx/issues/1642 - problems = graph.register_library(library.pypi.package) - if len(problems) > 0: - yield from problems - if library.jar: - # TODO: https://github.com/databrickslabs/ucx/issues/1641 - problems = graph.register_library(library.jar) - if len(problems) > 0: - yield from problems - if library.egg: - # TODO: https://github.com/databrickslabs/ucx/issues/1643 - problems = graph.register_library(library.egg) - if len(problems) > 0: - yield from problems - if library.whl: - # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path - # TODO: https://github.com/databrickslabs/ucx/issues/1640 - problems = graph.register_library(library.whl) - if len(problems) > 0: - yield from problems - if library.requirements: - # TODO: download and add every entry via graph.register_library - # TODO: https://github.com/databrickslabs/ucx/issues/1644 - problems = graph.register_library(library.requirements) + libraries = ( + library.pypi.package if library.pypi else None, # TODO: https://github.com/databrickslabs/ucx/issues/1642 + library.jar, # TODO: https://github.com/databrickslabs/ucx/issues/1641 + library.egg, # TODO: https://github.com/databrickslabs/ucx/issues/1643 + library.whl, # TODO: https://github.com/databrickslabs/ucx/issues/1640 + library.requirements, # TODO: https://github.com/databrickslabs/ucx/issues/1644 + ) + for lib in libraries: + if lib is None: + continue + problems = graph.register_library(lib) if len(problems) > 0: yield from problems From c8fc07e000eff6b6f20a53066c1e77df1ffaf8a5 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 17:09:23 +0200 Subject: [PATCH 29/43] Add test to increase coverage --- tests/unit/source_code/test_graph.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 8287de44b9..2dc9750ca6 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -1,7 +1,7 @@ from pathlib import Path from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, StubImportResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.site_packages import PipResolver @@ -16,3 +16,11 @@ def test_dependency_graph_register_library(mock_path_lookup): assert len(problems) == 0 assert graph.path_lookup.resolve(Path("pytest")).exists() + + +def test_stub_import_resolver_fails_with_library_not_found_dependency_problem(mock_path_lookup): + resolver = StubImportResolver() + maybe = resolver.resolve_library_pip(mock_path_lookup, "test") + + assert len(maybe.problems) == 1 + assert maybe.problems[0].code == "library-not-found" From 68ef91fc87ac354c649cc64fdbe441c677724b7f Mon Sep 17 00:00:00 2001 From: Cor Date: Wed, 15 May 2024 18:02:45 +0200 Subject: [PATCH 30/43] Add pip cell installs to dependency graph (#1698) ## Changes Logic for registering import installed in a pip cell ### Linked issues #1642 Follow up on #1692 ### Functionality - [ ] added relevant user documentation - [ ] added new CLI command - [ ] modified existing command: `databricks labs ucx ...` - [ ] added a new workflow - [ ] modified existing workflow: `...` - [ ] added a new table - [ ] modified existing table: `...` ### Tests - [ ] manually tested - [ ] added unit tests - [ ] added integration tests - [ ] verified on staging environment (screenshot attached) --- src/databricks/labs/ucx/source_code/graph.py | 1 + .../labs/ucx/source_code/notebooks/cells.py | 13 ++- tests/unit/source_code/notebooks/__init__.py | 0 .../unit/source_code/notebooks/test_cells.py | 88 +++++++++++++++++++ 4 files changed, 100 insertions(+), 2 deletions(-) create mode 100644 tests/unit/source_code/notebooks/__init__.py create mode 100644 tests/unit/source_code/notebooks/test_cells.py diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 798733c60f..b6512f0ca3 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -80,6 +80,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph: child_graph = DependencyGraph(dependency, self, self._resolver, self._path_lookup) self._dependencies[dependency] = child_graph container = dependency.load(self.path_lookup) + # TODO: Return either (child) graph OR problems if not container: problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path) return MaybeGraph(child_graph, [problem]) diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index bdba2a4047..55ab649ad6 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -182,9 +182,18 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, _: DependencyGraph) -> list[DependencyProblem]: + def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProblem]: # TODO: https://github.com/databrickslabs/ucx/issues/1642 - return [] + # TODO: this is very basic code, we need to improve it + splits = self.original_code.split(' ') + if len(splits) < 3: + return [DependencyProblem("library-install-failed", f"Missing arguments in '{self.original_code}'")] + if splits[1] != "install": + return [DependencyProblem("library-install-failed", f"Unsupported %pip command: {splits[1]}")] + # TODO: we need to support different formats of the library name and etc + library = splits[2] + + return graph.register_library(library) class CellLanguage(Enum): diff --git a/tests/unit/source_code/notebooks/__init__.py b/tests/unit/source_code/notebooks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py new file mode 100644 index 0000000000..325b499d56 --- /dev/null +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -0,0 +1,88 @@ +from pathlib import Path +from unittest.mock import create_autospec + +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell +from databricks.labs.ucx.source_code.notebooks.loaders import ( + NotebookResolver, + NotebookLoader, +) +from databricks.labs.ucx.source_code.site_packages import PipResolver + + +def test_pip_cell_language_is_pip(): + assert PipCell("code").language == CellLanguage.PIP + + +def test_pip_cell_build_dependency_graph_invokes_register_library(): + graph = create_autospec(DependencyGraph) + + code = "%pip install databricks" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 0 + graph.register_library.assert_called_once_with("databricks") + + +def test_pip_cell_build_dependency_graph_pip_registers_missing_library(): + graph = create_autospec(DependencyGraph) + + code = "%pip install" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message == "Missing arguments in '%pip install'" + graph.register_library.assert_not_called() + + +def test_pip_cell_build_dependency_graph_reports_incorrect_syntax(): + graph = create_autospec(DependencyGraph) + + code = "%pip installl pytest" # typo on purpose + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message == "Unsupported %pip command: installl" + graph.register_library.assert_not_called() + + +def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_lookup): + dependency = Dependency(FileLoader(), Path("test")) + notebook_loader = NotebookLoader() + notebook_resolver = NotebookResolver(notebook_loader) + dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + + code = "%pip install unknown-library-name" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install unknown-library-name") + + +def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lookup): + dependency = Dependency(FileLoader(), Path("test")) + notebook_loader = NotebookLoader() + notebook_resolver = NotebookResolver(notebook_loader) + dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + + code = "%pip install pytest" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() From 5416af0e17fad04566c2829ed2e3ea875eb3469c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Wed, 15 May 2024 18:04:17 +0200 Subject: [PATCH 31/43] Rename resolve_library_pip to resolve_library --- src/databricks/labs/ucx/source_code/graph.py | 8 ++++---- src/databricks/labs/ucx/source_code/site_packages.py | 2 +- tests/unit/source_code/test_graph.py | 2 +- tests/unit/source_code/test_site_packages.py | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index b6512f0ca3..ed9476ddb2 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -277,10 +277,10 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: assert self._next_resolver is not None return self._next_resolver.resolve_import(path_lookup, name) - def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: # TODO: remove StubResolver and return MaybeDependency(None, [...]) assert self._next_resolver is not None - return self._next_resolver.resolve_library_pip(path_lookup, name) + return self._next_resolver.resolve_library(path_lookup, name) class BaseFileResolver(abc.ABC): @@ -301,7 +301,7 @@ def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._fail('import-not-found', f"Could not locate import: {name}") - def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._fail('library-not-found', f"Could not resolve library: {name}") @staticmethod @@ -360,7 +360,7 @@ def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDepende if library.endswith(".txt"): problem = DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented") return MaybeDependency(None, [problem]) - return self._import_resolver.resolve_library_pip(path_lookup, library) + return self._import_resolver.resolve_library(path_lookup, library) def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 0d0faacf4d..5a7210c785 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -23,7 +23,7 @@ def __init__(self, next_resolver: BaseImportResolver | None = None) -> None: def with_next_resolver(self, resolver: BaseImportResolver) -> PipResolver: return PipResolver(resolver) - def resolve_library_pip(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: """Pip install library and augment path look-up to resolve the library at import""" # invoke pip install via subprocess to install this library into lib_install_folder pip_install_arguments = ["pip", "install", name, "-t", self._temporary_virtual_environment.as_posix()] diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 2dc9750ca6..0538b5aac0 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -20,7 +20,7 @@ def test_dependency_graph_register_library(mock_path_lookup): def test_stub_import_resolver_fails_with_library_not_found_dependency_problem(mock_path_lookup): resolver = StubImportResolver() - maybe = resolver.resolve_library_pip(mock_path_lookup, "test") + maybe = resolver.resolve_library(mock_path_lookup, "test") assert len(maybe.problems) == 1 assert maybe.problems[0].code == "library-not-found" diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index ef7b734bab..66b2fcd679 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -6,7 +6,7 @@ def test_pip_resolver_library(mock_path_lookup): pip_installer = PipResolver() - maybe = pip_installer.resolve_library_pip(mock_path_lookup, "pytest") + maybe = pip_installer.resolve_library(mock_path_lookup, "pytest") assert len(maybe.problems) == 0 assert mock_path_lookup.resolve(Path("pytest")).exists() @@ -14,7 +14,7 @@ def test_pip_resolver_library(mock_path_lookup): def test_pip_resolver_unknown_library(mock_path_lookup): pip_installer = PipResolver() - maybe = pip_installer.resolve_library_pip(mock_path_lookup, "unknown-library-name") + maybe = pip_installer.resolve_library(mock_path_lookup, "unknown-library-name") assert len(maybe.problems) == 1 assert maybe.problems[0].code == "library-install-failed" From 4e16abb2d63b19eaafd1712a366b8692a74dcd8c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:23:57 +0200 Subject: [PATCH 32/43] Add library resolvers to DependencyResolvers --- .../labs/ucx/contexts/application.py | 2 +- src/databricks/labs/ucx/source_code/graph.py | 2 + .../unit/source_code/notebooks/test_cells.py | 4 +- tests/unit/source_code/test_dependencies.py | 42 +++++++++---------- tests/unit/source_code/test_graph.py | 2 +- tests/unit/source_code/test_jobs.py | 2 +- tests/unit/source_code/test_notebook.py | 12 +++--- .../test_path_lookup_simulation.py | 8 ++-- tests/unit/source_code/test_s3fs.py | 4 +- 9 files changed, 40 insertions(+), 38 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 6d69a9d16d..0bd7b4c81e 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -396,7 +396,7 @@ def file_resolver(self): @cached_property def dependency_resolver(self): import_resolvers = [self.pip_resolver, self.file_resolver, self.whitelist_resolver] - return DependencyResolver(self.notebook_resolver, import_resolvers, self.path_lookup) + return DependencyResolver([], self.notebook_resolver, import_resolvers, self.path_lookup) @cached_property def workflow_linter(self): diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index ed9476ddb2..b7b8cd607f 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -325,10 +325,12 @@ class DependencyResolver: def __init__( self, + library_resolvers, notebook_resolver: BaseNotebookResolver, import_resolvers: list[BaseImportResolver], path_lookup: PathLookup, ): + self._library_resolvers = library_resolvers self._notebook_resolver = notebook_resolver self._import_resolver = self._chain_import_resolvers(import_resolvers) self._path_lookup = path_lookup diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py index 325b499d56..2c7b471860 100644 --- a/tests/unit/source_code/notebooks/test_cells.py +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -59,7 +59,7 @@ def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_looku dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [PipResolver()], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) code = "%pip install unknown-library-name" @@ -76,7 +76,7 @@ def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lo dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [PipResolver()], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) code = "%pip install pytest" diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index 55d89a43aa..399e162f86 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -23,7 +23,7 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root3.run.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"} @@ -32,7 +32,7 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(mock_pa def test_dependency_graph_builder_visits_local_notebook_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root4.py.txt", "leaf3.py.txt"} @@ -47,7 +47,7 @@ def test_dependency_graph_builder_visits_workspace_file_dependencies(mock_path_l WhitelistResolver(whi), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path('./root8.py')) assert not maybe.failed assert maybe.graph.all_relative_names() == {'leaf1.py.txt', 'leaf2.py.txt', 'root8.py.txt'} @@ -62,7 +62,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook WhitelistResolver(whi), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py")) assert list(maybe.problems) == [ DependencyProblem( @@ -80,7 +80,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dependency(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py")) assert list(maybe.problems) == [ DependencyProblem( @@ -92,7 +92,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dep def test_dependency_graph_builder_raises_problem_with_non_constant_local_notebook_dependency(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path('root10.py.txt')) assert list(maybe.problems) == [ DependencyProblem( @@ -110,7 +110,7 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_local_noteboo def test_dependency_graph_builder_raises_problem_with_invalid_run_cell(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path('leaf6.py.txt')) assert list(maybe.problems) == [ DependencyProblem('invalid-run-cell', 'Missing notebook path in %run command', Path('leaf6.py.txt'), 5, 0, 5, 4) @@ -121,7 +121,7 @@ def test_dependency_graph_builder_visits_recursive_file_dependencies(mock_path_l notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("root6.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root6.py.txt", "root5.py.txt", "leaf4.py.txt"} @@ -131,7 +131,7 @@ def test_dependency_graph_builder_raises_problem_with_unresolved_import(mock_pat notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path('root7.py.txt')) assert list(maybe.problems) == [ DependencyProblem( @@ -144,7 +144,7 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_notebook_argu notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("run_notebooks.py.txt")) assert list(maybe.problems) == [ DependencyProblem( @@ -163,7 +163,7 @@ def test_dependency_graph_builder_visits_file_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("root5.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root5.py.txt", "leaf4.py.txt"} @@ -173,7 +173,7 @@ def test_dependency_graph_builder_skips_builtin_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert not maybe.failed graph = maybe.graph @@ -187,7 +187,7 @@ def test_dependency_graph_builder_ignores_known_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert maybe.graph graph = maybe.graph @@ -204,7 +204,7 @@ def test_dependency_graph_builder_visits_site_packages(empty_index, mock_noteboo WhitelistResolver(Whitelist()), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-site-package.py.txt")) assert not maybe.failed graph = maybe.graph @@ -228,7 +228,7 @@ def test_dependency_graph_builder_resolves_sub_site_package(): LocalFileResolver(file_loader), WhitelistResolver(whitelist), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert maybe.graph maybe = maybe.graph.locate_dependency(Path(site_packages_path, "databricks", "labs", "lsql", "core.py")) @@ -237,7 +237,7 @@ def test_dependency_graph_builder_resolves_sub_site_package(): def test_dependency_graph_builder_raises_problem_with_unfound_root_file(mock_path_lookup, mock_notebook_resolver): import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py.txt")) assert list(maybe.problems) == [ DependencyProblem('file-not-found', 'File not found: non-existing.py.txt', Path("non-existing.py.txt")) @@ -247,7 +247,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_root_file(mock_pat def test_dependency_graph_builder_raises_problem_with_unfound_root_notebook(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("unknown_notebook")) assert list(maybe.problems) == [ DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook', Path("unknown_notebook")) @@ -262,7 +262,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So file_loader = FailingFileLoader() import_resolvers = [LocalFileResolver(file_loader)] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert list(maybe.problems) == [ DependencyProblem( @@ -279,7 +279,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So notebook_loader = FailingNotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root5.py.txt")) assert list(maybe.problems) == [ DependencyProblem('cannot-load-notebook', 'Could not load notebook root5.py.txt', Path('')) @@ -287,7 +287,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So def test_dependency_graph_builder_raises_problem_with_missing_file_loader(mock_notebook_resolver, mock_path_lookup): - dependency_resolver = DependencyResolver(mock_notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert list(maybe.problems) == [ DependencyProblem('missing-file-resolver', 'Missing resolver for local files', Path('')) @@ -296,5 +296,5 @@ def test_dependency_graph_builder_raises_problem_with_missing_file_loader(mock_n def test_dependency_graph_builder_repr(mock_notebook_resolver, mock_path_lookup): # improve test coverage - dependency_resolver = DependencyResolver(mock_notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, [], mock_path_lookup) assert len(repr(dependency_resolver)) > 0 diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 0538b5aac0..33cb886490 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -9,7 +9,7 @@ def test_dependency_graph_register_library(mock_path_lookup): """Install pytest using pip installer""" dependency = Dependency(FileLoader(), Path("test")) - dependency_resolver = DependencyResolver(NotebookResolver(NotebookLoader()), [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([], NotebookResolver(NotebookLoader()), [PipResolver()], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) problems = graph.register_library("pytest") diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 7f25035b5e..ca694dcc43 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -18,7 +18,7 @@ def graph(mock_path_lookup) -> DependencyGraph: notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolver = PipResolver() - dependency_resolver = DependencyResolver(notebook_resolver, [import_resolver], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [import_resolver], mock_path_lookup) dependency_graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) return dependency_graph diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index 4c49354335..d4ecebc99f 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -128,7 +128,7 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str def test_notebook_builds_leaf_dependency_graph(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path("leaf1.py.txt")) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -146,7 +146,7 @@ def test_notebook_builds_depth1_dependency_graph(mock_path_lookup): paths = ["root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -159,7 +159,7 @@ def test_notebook_builds_depth2_dependency_graph(mock_path_lookup): paths = ["root2.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -172,7 +172,7 @@ def test_notebook_builds_dependency_graph_avoiding_duplicates(mock_path_lookup): paths = ["root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -186,7 +186,7 @@ def test_notebook_builds_cyclical_dependency_graph(mock_path_lookup): paths = ["cyclical1.run.py.txt", "cyclical2.run.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -199,7 +199,7 @@ def test_notebook_builds_python_dependency_graph(mock_path_lookup): paths = ["root4.py.txt", "leaf3.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) diff --git a/tests/unit/source_code/test_path_lookup_simulation.py b/tests/unit/source_code/test_path_lookup_simulation.py index 8b33813038..1e97bbc2ac 100644 --- a/tests/unit/source_code/test_path_lookup_simulation.py +++ b/tests/unit/source_code/test_path_lookup_simulation.py @@ -45,7 +45,7 @@ def test_locates_notebooks(source: list[str], expected: int, mock_path_lookup): WhitelistResolver(Whitelist()), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(notebook_path) assert not maybe.problems assert maybe.graph is not None @@ -73,7 +73,7 @@ def test_locates_files(source: list[str], expected: int): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_local_file_dependency_graph(file_path) assert not maybe.problems assert maybe.graph is not None @@ -114,7 +114,7 @@ def test_locates_notebooks_with_absolute_path(): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path) assert not maybe.problems assert maybe.graph is not None @@ -155,7 +155,7 @@ def func(): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path) assert not maybe.problems assert maybe.graph is not None diff --git a/tests/unit/source_code/test_s3fs.py b/tests/unit/source_code/test_s3fs.py index bab09d9011..311f768cd5 100644 --- a/tests/unit/source_code/test_s3fs.py +++ b/tests/unit/source_code/test_s3fs.py @@ -117,7 +117,7 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP file_loader = FileLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(sample) assert maybe.problems == [_.replace(source_path=sample) for _ in expected] @@ -145,7 +145,7 @@ def test_detect_s3fs_import_in_dependencies( file_loader = FileLoader() whitelist = Whitelist.parse(yml.read_text()) import_resolvers = [LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) sample = mock_path_lookup.cwd / "root9.py.txt" maybe = dependency_resolver.build_local_file_dependency_graph(sample) assert maybe.problems == expected From fe39eceb59dda4c9b9d5028e8e7157db67fa01d4 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:29:19 +0200 Subject: [PATCH 33/43] Add BaseLibraryResolver --- src/databricks/labs/ucx/source_code/graph.py | 21 ++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index b7b8cd607f..613f7d985b 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -248,6 +248,20 @@ def __repr__(self): return f"" +class BaseLibraryResolver(abc.ABC): + + def __init__(self, next_resolver: BaseLibraryResolver | None): + self._next_resolver = next_resolver + + @abc.abstractmethod + def with_next_resolver(self, resolver: BaseLibraryResolver) -> BaseLibraryResolver: + """required to create a linked list of resolvers""" + + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + assert self._next_resolver is not None + return self._next_resolver.resolve_library(path_lookup, library) + + class BaseNotebookResolver(abc.ABC): @abc.abstractmethod @@ -277,11 +291,6 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: assert self._next_resolver is not None return self._next_resolver.resolve_import(path_lookup, name) - def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: - # TODO: remove StubResolver and return MaybeDependency(None, [...]) - assert self._next_resolver is not None - return self._next_resolver.resolve_library(path_lookup, name) - class BaseFileResolver(abc.ABC): @@ -325,7 +334,7 @@ class DependencyResolver: def __init__( self, - library_resolvers, + library_resolvers: list[BaseLibraryResolver], notebook_resolver: BaseNotebookResolver, import_resolvers: list[BaseImportResolver], path_lookup: PathLookup, From 501f3f497e6f4ae1ad7cae6c008f96a33aac73bd Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:53:10 +0200 Subject: [PATCH 34/43] Add BaseLibraryResolver --- tests/unit/source_code/test_graph.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 33cb886490..0b648d2d52 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -1,7 +1,7 @@ from pathlib import Path from databricks.labs.ucx.source_code.files import FileLoader -from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, StubImportResolver +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, StubLibraryResolver from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader from databricks.labs.ucx.source_code.site_packages import PipResolver @@ -19,7 +19,7 @@ def test_dependency_graph_register_library(mock_path_lookup): def test_stub_import_resolver_fails_with_library_not_found_dependency_problem(mock_path_lookup): - resolver = StubImportResolver() + resolver = StubLibraryResolver() maybe = resolver.resolve_library(mock_path_lookup, "test") assert len(maybe.problems) == 1 From 05b05dd105c799fa5bddf3edbc49d791f203a583 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:53:22 +0200 Subject: [PATCH 35/43] Chain library resolvers --- src/databricks/labs/ucx/source_code/graph.py | 31 ++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 613f7d985b..5a048bf17c 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -299,6 +299,22 @@ def resolve_local_file(self, path_lookup, path: Path) -> MaybeDependency: """locates a file""" +class StubLibraryResolver(BaseLibraryResolver): + + def __init__(self): + super().__init__(None) + + def with_next_resolver(self, resolver: BaseLibraryResolver) -> BaseLibraryResolver: + raise NotImplementedError("Should never happen!") + + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + return self._fail('library-not-found', f"Could not resolve library: {library}") + + @staticmethod + def _fail(code: str, message: str): + return MaybeDependency(None, [DependencyProblem(code, message)]) + + class StubImportResolver(BaseImportResolver): def __init__(self): @@ -310,9 +326,6 @@ def with_next_resolver(self, resolver: BaseImportResolver) -> BaseImportResolver def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._fail('import-not-found', f"Could not locate import: {name}") - def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: - return self._fail('library-not-found', f"Could not resolve library: {name}") - @staticmethod def _fail(code: str, message: str): return MaybeDependency(None, [DependencyProblem(code, message)]) @@ -339,11 +352,19 @@ def __init__( import_resolvers: list[BaseImportResolver], path_lookup: PathLookup, ): - self._library_resolvers = library_resolvers + self._library_resolver = self._chain_library_resolvers(library_resolvers) self._notebook_resolver = notebook_resolver self._import_resolver = self._chain_import_resolvers(import_resolvers) self._path_lookup = path_lookup + @staticmethod + def _chain_library_resolvers(library_resolvers: list[BaseLibraryResolver]) -> BaseLibraryResolver: + previous: BaseLibraryResolver = StubLibraryResolver() + for resolver in library_resolvers: + resolver = resolver.with_next_resolver(previous) + previous = resolver + return previous + @staticmethod def _chain_import_resolvers(import_resolvers: list[BaseImportResolver]) -> BaseImportResolver: previous: BaseImportResolver = StubImportResolver() @@ -371,7 +392,7 @@ def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDepende if library.endswith(".txt"): problem = DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented") return MaybeDependency(None, [problem]) - return self._import_resolver.resolve_library(path_lookup, library) + return self._library_resolver.resolve_library(path_lookup, library) def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. From a1b2807c6879a2263d8df277d9ca944dbd7cf8f8 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:54:08 +0200 Subject: [PATCH 36/43] Use library resolver for PipResolver --- src/databricks/labs/ucx/source_code/site_packages.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 5a7210c785..08dfecf820 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -7,20 +7,20 @@ from pathlib import Path from subprocess import CalledProcessError -from databricks.labs.ucx.source_code.graph import BaseImportResolver, DependencyProblem, MaybeDependency +from databricks.labs.ucx.source_code.graph import BaseLibraryResolver, DependencyProblem, MaybeDependency from databricks.labs.ucx.source_code.path_lookup import PathLookup -class PipResolver(BaseImportResolver): +class PipResolver(BaseLibraryResolver): # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies # TODO: https://github.com/databrickslabs/ucx/issues/1642 # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 - def __init__(self, next_resolver: BaseImportResolver | None = None) -> None: + def __init__(self, next_resolver: BaseLibraryResolver | None = None) -> None: super().__init__(next_resolver) - def with_next_resolver(self, resolver: BaseImportResolver) -> PipResolver: + def with_next_resolver(self, resolver: BaseLibraryResolver) -> PipResolver: return PipResolver(resolver) def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: From 45ede0ccf14c378e544cc4a3cab53c77f1eabd9e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 09:57:39 +0200 Subject: [PATCH 37/43] Move not-yet-implemented to register_library --- src/databricks/labs/ucx/source_code/graph.py | 12 -------- src/databricks/labs/ucx/source_code/jobs.py | 30 ++++++++++++-------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 5a048bf17c..7887d9a421 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -380,18 +380,6 @@ def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._import_resolver.resolve_import(path_lookup, name) def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: - if library.endswith(".jar"): - problem = DependencyProblem("not-yet-implemented", "Jar library is not yet implemented") - return MaybeDependency(None, [problem]) - if library.endswith(".egg"): - problem = DependencyProblem("not-yet-implemented", "Egg library is not yet implemented") - return MaybeDependency(None, [problem]) - if library.endswith(".whl"): - problem = DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented") - return MaybeDependency(None, [problem]) - if library.endswith(".txt"): - problem = DependencyProblem("not-yet-implemented", "Requirements library is not yet implemented") - return MaybeDependency(None, [problem]) return self._library_resolver.resolve_library(path_lookup, library) def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index cb486abdc3..1cf874612c 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -84,19 +84,25 @@ def _register_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProb @staticmethod def _register_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: - libraries = ( - library.pypi.package if library.pypi else None, # TODO: https://github.com/databrickslabs/ucx/issues/1642 - library.jar, # TODO: https://github.com/databrickslabs/ucx/issues/1641 - library.egg, # TODO: https://github.com/databrickslabs/ucx/issues/1643 - library.whl, # TODO: https://github.com/databrickslabs/ucx/issues/1640 - library.requirements, # TODO: https://github.com/databrickslabs/ucx/issues/1644 - ) - for lib in libraries: - if lib is None: - continue - problems = graph.register_library(lib) - if len(problems) > 0: + if library.pypi: + # TODO: https://github.com/databrickslabs/ucx/issues/1642 + problems = graph.register_library(library.pypi.package) + if problems: yield from problems + if library.jar: + # TODO: https://github.com/databrickslabs/ucx/issues/1641 + yield DependencyProblem('not-yet-implemented', 'Jar library is not yet implemented') + if library.egg: + # TODO: https://github.com/databrickslabs/ucx/issues/1643 + yield DependencyProblem("not-yet-implemented", "Egg library is not yet implemented") + if library.whl: + # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path + # TODO: https://github.com/databrickslabs/ucx/issues/1640 + yield DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented") + if library.requirements: + # TODO: download and add every entry via graph.register_library + # TODO: https://github.com/databrickslabs/ucx/issues/1644 + yield DependencyProblem('not-yet-implemented', 'Requirements library is not yet implemented') def _register_notebook(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.notebook_task: From 7b90413d2ebc45197533f99b38449e12a8a12481 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:02:32 +0200 Subject: [PATCH 38/43] Fix issues --- src/databricks/labs/ucx/source_code/site_packages.py | 6 +++--- tests/unit/source_code/notebooks/test_cells.py | 4 ++-- tests/unit/source_code/test_graph.py | 2 +- tests/unit/source_code/test_jobs.py | 5 +---- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index 08dfecf820..ecfe5ecc5d 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -23,14 +23,14 @@ def __init__(self, next_resolver: BaseLibraryResolver | None = None) -> None: def with_next_resolver(self, resolver: BaseLibraryResolver) -> PipResolver: return PipResolver(resolver) - def resolve_library(self, path_lookup: PathLookup, name: str) -> MaybeDependency: + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: """Pip install library and augment path look-up to resolve the library at import""" # invoke pip install via subprocess to install this library into lib_install_folder - pip_install_arguments = ["pip", "install", name, "-t", self._temporary_virtual_environment.as_posix()] + pip_install_arguments = ["pip", "install", library, "-t", self._temporary_virtual_environment.as_posix()] try: subprocess.run(pip_install_arguments, check=True) except CalledProcessError as e: - problem = DependencyProblem("library-install-failed", f"Failed to install {name}: {e}") + problem = DependencyProblem("library-install-failed", f"Failed to install {library}: {e}") return MaybeDependency(None, [problem]) path_lookup.append_path(self._temporary_virtual_environment) diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py index 2c7b471860..5c66b08a32 100644 --- a/tests/unit/source_code/notebooks/test_cells.py +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -59,7 +59,7 @@ def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_looku dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver([], notebook_resolver, [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], notebook_resolver, [], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) code = "%pip install unknown-library-name" @@ -76,7 +76,7 @@ def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lo dependency = Dependency(FileLoader(), Path("test")) notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver([], notebook_resolver, [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], notebook_resolver, [], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) code = "%pip install pytest" diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index 0b648d2d52..bb31c104f7 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -9,7 +9,7 @@ def test_dependency_graph_register_library(mock_path_lookup): """Install pytest using pip installer""" dependency = Dependency(FileLoader(), Path("test")) - dependency_resolver = DependencyResolver([], NotebookResolver(NotebookLoader()), [PipResolver()], mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], NotebookResolver(NotebookLoader()), [], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) problems = graph.register_library("pytest") diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index ca694dcc43..37a35674ef 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -15,10 +15,7 @@ @pytest.fixture def graph(mock_path_lookup) -> DependencyGraph: dependency = Dependency(FileLoader(), Path("test")) - notebook_loader = NotebookLoader() - notebook_resolver = NotebookResolver(notebook_loader) - import_resolver = PipResolver() - dependency_resolver = DependencyResolver([], notebook_resolver, [import_resolver], mock_path_lookup) + dependency_resolver = DependencyResolver([PipResolver()], NotebookResolver(NotebookLoader()), [], mock_path_lookup) dependency_graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) return dependency_graph From 8655a0e913b2e5f73e7c00c1343ffd281d6c0d89 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:03:42 +0200 Subject: [PATCH 39/43] Move PipResolver in application --- src/databricks/labs/ucx/contexts/application.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 0bd7b4c81e..1e8d6f6187 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -395,8 +395,9 @@ def file_resolver(self): @cached_property def dependency_resolver(self): - import_resolvers = [self.pip_resolver, self.file_resolver, self.whitelist_resolver] - return DependencyResolver([], self.notebook_resolver, import_resolvers, self.path_lookup) + import_resolvers = [self.file_resolver, self.whitelist_resolver] + library_resolvers = [self.pip_resolver] + return DependencyResolver(library_resolvers, self.notebook_resolver, import_resolvers, self.path_lookup) @cached_property def workflow_linter(self): From 34542567f0450d0980dd08bef7ffbe40ae6b7cb5 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:06:14 +0200 Subject: [PATCH 40/43] Update test names --- tests/unit/source_code/test_graph.py | 3 +-- tests/unit/source_code/test_jobs.py | 12 ++++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py index bb31c104f7..00aa6a6f45 100644 --- a/tests/unit/source_code/test_graph.py +++ b/tests/unit/source_code/test_graph.py @@ -6,8 +6,7 @@ from databricks.labs.ucx.source_code.site_packages import PipResolver -def test_dependency_graph_register_library(mock_path_lookup): - """Install pytest using pip installer""" +def test_dependency_graph_registers_library(mock_path_lookup): dependency = Dependency(FileLoader(), Path("test")) dependency_resolver = DependencyResolver([PipResolver()], NotebookResolver(NotebookLoader()), [], mock_path_lookup) graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py index 37a35674ef..d6cf50ae75 100644 --- a/tests/unit/source_code/test_jobs.py +++ b/tests/unit/source_code/test_jobs.py @@ -20,8 +20,7 @@ def graph(mock_path_lookup) -> DependencyGraph: return dependency_graph -def test_workflow_task_container_build_dependency_graph_not_yet_implemented(mock_path_lookup, graph): - """Receive not yet implemented problems""" +def test_workflow_task_container_builds_dependency_graph_not_yet_implemented(mock_path_lookup, graph): # Goal of test is to raise test coverage, remove after implementing ws = create_autospec(WorkspaceClient) library = compute.Library(jar="library.jar", egg="library.egg", whl="libary.whl", requirements="requirements.txt") @@ -35,8 +34,7 @@ def test_workflow_task_container_build_dependency_graph_not_yet_implemented(mock ws.assert_not_called() -def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_lookup, graph): - """No dependency problems with empty task""" +def test_workflow_task_container_builds_dependency_graph_empty_task(mock_path_lookup, graph): ws = create_autospec(WorkspaceClient) task = jobs.Task(task_key="test") @@ -47,8 +45,7 @@ def test_workflow_task_container_build_dependency_graph_empty_task(mock_path_loo ws.assert_not_called() -def test_workflow_task_container_build_dependency_graph_pytest_pypi_library(mock_path_lookup, graph): - """Install pytest PyPi library""" +def test_workflow_task_container_builds_dependency_graph_pytest_pypi_library(mock_path_lookup, graph): ws = create_autospec(WorkspaceClient) libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="pytest"))] task = jobs.Task(task_key="test", libraries=libraries) @@ -61,8 +58,7 @@ def test_workflow_task_container_build_dependency_graph_pytest_pypi_library(mock ws.assert_not_called() -def test_workflow_task_container_build_dependency_graph_unknown_pypi_library(mock_path_lookup, graph): - """Install unknown PyPi library""" +def test_workflow_task_container_builds_dependency_graph_unknown_pypi_library(mock_path_lookup, graph): ws = create_autospec(WorkspaceClient) libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="unknown-library-name"))] task = jobs.Task(task_key="test", libraries=libraries) From fb407118af453207f216cab399737aee499d9650 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:08:50 +0200 Subject: [PATCH 41/43] Put back TODO's --- src/databricks/labs/ucx/source_code/graph.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 7887d9a421..03151f79c8 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -46,6 +46,9 @@ def path(self): return self._dependency.path def register_library(self, library: str) -> list[DependencyProblem]: + # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies + # TODO: https://github.com/databrickslabs/ucx/issues/1643 + # TODO: https://github.com/databrickslabs/ucx/issues/1640 maybe = self._resolver.resolve_library(self.path_lookup, library) if not maybe.dependency: return maybe.problems From 661d2a37e00aa8bdcc2a75028d14d546197118a1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:12:00 +0200 Subject: [PATCH 42/43] Update test names --- tests/unit/source_code/test_site_packages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 66b2fcd679..b6920bea33 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -4,7 +4,7 @@ from tests.unit import locate_site_packages -def test_pip_resolver_library(mock_path_lookup): +def test_pip_resolver_resolves_library(mock_path_lookup): pip_installer = PipResolver() maybe = pip_installer.resolve_library(mock_path_lookup, "pytest") @@ -12,7 +12,7 @@ def test_pip_resolver_library(mock_path_lookup): assert mock_path_lookup.resolve(Path("pytest")).exists() -def test_pip_resolver_unknown_library(mock_path_lookup): +def test_pip_resolver_does_not_resolve_unknown_library(mock_path_lookup): pip_installer = PipResolver() maybe = pip_installer.resolve_library(mock_path_lookup, "unknown-library-name") From d979f746e9d5773c537e3514cefa1a544263e7b0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Thu, 16 May 2024 10:46:46 +0200 Subject: [PATCH 43/43] Add todo --- src/databricks/labs/ucx/source_code/graph.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index 03151f79c8..93cb31e4e4 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -47,6 +47,7 @@ def path(self): def register_library(self, library: str) -> list[DependencyProblem]: # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies + # TODO: https://github.com/databrickslabs/ucx/issues/1642 # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 maybe = self._resolver.resolve_library(self.path_lookup, library)