diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index f2fc31913b..1e8d6f6187 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -43,7 +43,7 @@ from databricks.labs.ucx.source_code.path_lookup import PathLookup from databricks.labs.ucx.source_code.graph import DependencyResolver from databricks.labs.ucx.source_code.whitelist import WhitelistResolver, Whitelist -from databricks.labs.ucx.source_code.site_packages import SitePackages +from databricks.labs.ucx.source_code.site_packages import PipResolver, SitePackages from databricks.labs.ucx.source_code.languages import Languages from databricks.labs.ucx.source_code.redash import Redash from databricks.labs.ucx.workspace_access import generic, redash @@ -350,6 +350,10 @@ def workspace_info(self): def verify_has_metastore(self): return VerifyHasMetastore(self.workspace_client) + @cached_property + def pip_resolver(self): + return PipResolver() + @cached_property def notebook_loader(self) -> NotebookLoader: return NotebookLoader() @@ -392,7 +396,8 @@ def file_resolver(self): @cached_property def dependency_resolver(self): import_resolvers = [self.file_resolver, self.whitelist_resolver] - return DependencyResolver(self.notebook_resolver, import_resolvers, self.path_lookup) + library_resolvers = [self.pip_resolver] + return DependencyResolver(library_resolvers, self.notebook_resolver, import_resolvers, self.path_lookup) @cached_property def workflow_linter(self): diff --git a/src/databricks/labs/ucx/source_code/graph.py b/src/databricks/labs/ucx/source_code/graph.py index efeff2391b..93cb31e4e4 100644 --- a/src/databricks/labs/ucx/source_code/graph.py +++ b/src/databricks/labs/ucx/source_code/graph.py @@ -45,12 +45,16 @@ def dependency(self): def path(self): return self._dependency.path - def register_library(self, name: str) -> MaybeGraph: + def register_library(self, library: str) -> list[DependencyProblem]: # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies # TODO: https://github.com/databrickslabs/ucx/issues/1642 # TODO: https://github.com/databrickslabs/ucx/issues/1643 # TODO: https://github.com/databrickslabs/ucx/issues/1640 - return MaybeGraph(None, [DependencyProblem('not-yet-implemented', f'Library dependency: {name}')]) + maybe = self._resolver.resolve_library(self.path_lookup, library) + if not maybe.dependency: + return maybe.problems + maybe_graph = self.register_dependency(maybe.dependency) + return maybe_graph.problems def register_notebook(self, path: Path) -> list[DependencyProblem]: maybe = self._resolver.resolve_notebook(self.path_lookup, path) @@ -80,6 +84,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph: child_graph = DependencyGraph(dependency, self, self._resolver, self._path_lookup) self._dependencies[dependency] = child_graph container = dependency.load(self.path_lookup) + # TODO: Return either (child) graph OR problems if not container: problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path) return MaybeGraph(child_graph, [problem]) @@ -247,6 +252,20 @@ def __repr__(self): return f"" +class BaseLibraryResolver(abc.ABC): + + def __init__(self, next_resolver: BaseLibraryResolver | None): + self._next_resolver = next_resolver + + @abc.abstractmethod + def with_next_resolver(self, resolver: BaseLibraryResolver) -> BaseLibraryResolver: + """required to create a linked list of resolvers""" + + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + assert self._next_resolver is not None + return self._next_resolver.resolve_library(path_lookup, library) + + class BaseNotebookResolver(abc.ABC): @abc.abstractmethod @@ -284,6 +303,22 @@ def resolve_local_file(self, path_lookup, path: Path) -> MaybeDependency: """locates a file""" +class StubLibraryResolver(BaseLibraryResolver): + + def __init__(self): + super().__init__(None) + + def with_next_resolver(self, resolver: BaseLibraryResolver) -> BaseLibraryResolver: + raise NotImplementedError("Should never happen!") + + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + return self._fail('library-not-found', f"Could not resolve library: {library}") + + @staticmethod + def _fail(code: str, message: str): + return MaybeDependency(None, [DependencyProblem(code, message)]) + + class StubImportResolver(BaseImportResolver): def __init__(self): @@ -316,14 +351,24 @@ class DependencyResolver: def __init__( self, + library_resolvers: list[BaseLibraryResolver], notebook_resolver: BaseNotebookResolver, import_resolvers: list[BaseImportResolver], path_lookup: PathLookup, ): + self._library_resolver = self._chain_library_resolvers(library_resolvers) self._notebook_resolver = notebook_resolver self._import_resolver = self._chain_import_resolvers(import_resolvers) self._path_lookup = path_lookup + @staticmethod + def _chain_library_resolvers(library_resolvers: list[BaseLibraryResolver]) -> BaseLibraryResolver: + previous: BaseLibraryResolver = StubLibraryResolver() + for resolver in library_resolvers: + resolver = resolver.with_next_resolver(previous) + previous = resolver + return previous + @staticmethod def _chain_import_resolvers(import_resolvers: list[BaseImportResolver]) -> BaseImportResolver: previous: BaseImportResolver = StubImportResolver() @@ -338,6 +383,9 @@ def resolve_notebook(self, path_lookup: PathLookup, path: Path) -> MaybeDependen def resolve_import(self, path_lookup: PathLookup, name: str) -> MaybeDependency: return self._import_resolver.resolve_import(path_lookup, name) + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + return self._library_resolver.resolve_library(path_lookup, library) + def build_local_file_dependency_graph(self, path: Path) -> MaybeGraph: """Builds a dependency graph starting from a file. This method is mainly intended for testing purposes. In case of problems, the paths in the problems will be relative to the starting path lookup.""" diff --git a/src/databricks/labs/ucx/source_code/jobs.py b/src/databricks/labs/ucx/source_code/jobs.py index ef161af0ad..1cf874612c 100644 --- a/src/databricks/labs/ucx/source_code/jobs.py +++ b/src/databricks/labs/ucx/source_code/jobs.py @@ -15,12 +15,12 @@ from databricks.labs.ucx.source_code.base import CurrentSessionState from databricks.labs.ucx.source_code.files import LocalFile from databricks.labs.ucx.source_code.graph import ( + Dependency, DependencyGraph, - SourceContainer, DependencyProblem, - Dependency, - WrappingLoader, DependencyResolver, + SourceContainer, + WrappingLoader, ) from databricks.labs.ucx.source_code.languages import Languages from databricks.labs.ucx.source_code.notebooks.sources import Notebook, NotebookLinter, FileLinter @@ -76,32 +76,29 @@ def _register_task_dependencies(self, graph: DependencyGraph) -> Iterable[Depend yield from self._register_existing_cluster_id(graph) yield from self._register_spark_submit_task(graph) - def _register_libraries(self, graph: DependencyGraph): + def _register_libraries(self, graph: DependencyGraph) -> Iterable[DependencyProblem]: if not self._task.libraries: return for library in self._task.libraries: - yield from self._lint_library(library, graph) + yield from self._register_library(graph, library) - def _lint_library(self, library: compute.Library, graph: DependencyGraph) -> Iterable[DependencyProblem]: + @staticmethod + def _register_library(graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]: if library.pypi: # TODO: https://github.com/databrickslabs/ucx/issues/1642 - maybe = graph.register_library(library.pypi.package) - if maybe.problems: - yield from maybe.problems + problems = graph.register_library(library.pypi.package) + if problems: + yield from problems if library.jar: # TODO: https://github.com/databrickslabs/ucx/issues/1641 yield DependencyProblem('not-yet-implemented', 'Jar library is not yet implemented') if library.egg: # TODO: https://github.com/databrickslabs/ucx/issues/1643 - maybe = graph.register_library(library.egg) - if maybe.problems: - yield from maybe.problems + yield DependencyProblem("not-yet-implemented", "Egg library is not yet implemented") if library.whl: # TODO: download the wheel somewhere local and add it to "virtual sys.path" via graph.path_lookup.push_path # TODO: https://github.com/databrickslabs/ucx/issues/1640 - maybe = graph.register_library(library.whl) - if maybe.problems: - yield from maybe.problems + yield DependencyProblem("not-yet-implemented", "Wheel library is not yet implemented") if library.requirements: # TODO: download and add every entry via graph.register_library # TODO: https://github.com/databrickslabs/ucx/issues/1644 diff --git a/src/databricks/labs/ucx/source_code/notebooks/cells.py b/src/databricks/labs/ucx/source_code/notebooks/cells.py index bdba2a4047..55ab649ad6 100644 --- a/src/databricks/labs/ucx/source_code/notebooks/cells.py +++ b/src/databricks/labs/ucx/source_code/notebooks/cells.py @@ -182,9 +182,18 @@ def language(self): def is_runnable(self) -> bool: return True # TODO - def build_dependency_graph(self, _: DependencyGraph) -> list[DependencyProblem]: + def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProblem]: # TODO: https://github.com/databrickslabs/ucx/issues/1642 - return [] + # TODO: this is very basic code, we need to improve it + splits = self.original_code.split(' ') + if len(splits) < 3: + return [DependencyProblem("library-install-failed", f"Missing arguments in '{self.original_code}'")] + if splits[1] != "install": + return [DependencyProblem("library-install-failed", f"Unsupported %pip command: {splits[1]}")] + # TODO: we need to support different formats of the library name and etc + library = splits[2] + + return graph.register_library(library) class CellLanguage(Enum): diff --git a/src/databricks/labs/ucx/source_code/site_packages.py b/src/databricks/labs/ucx/source_code/site_packages.py index f3fba67d87..ecfe5ecc5d 100644 --- a/src/databricks/labs/ucx/source_code/site_packages.py +++ b/src/databricks/labs/ucx/source_code/site_packages.py @@ -1,7 +1,50 @@ from __future__ import annotations import os +import subprocess +import tempfile +from functools import cached_property from pathlib import Path +from subprocess import CalledProcessError + +from databricks.labs.ucx.source_code.graph import BaseLibraryResolver, DependencyProblem, MaybeDependency +from databricks.labs.ucx.source_code.path_lookup import PathLookup + + +class PipResolver(BaseLibraryResolver): + # TODO: use DistInfoResolver to load wheel/egg/pypi dependencies + # TODO: https://github.com/databrickslabs/ucx/issues/1642 + # TODO: https://github.com/databrickslabs/ucx/issues/1643 + # TODO: https://github.com/databrickslabs/ucx/issues/1640 + + def __init__(self, next_resolver: BaseLibraryResolver | None = None) -> None: + super().__init__(next_resolver) + + def with_next_resolver(self, resolver: BaseLibraryResolver) -> PipResolver: + return PipResolver(resolver) + + def resolve_library(self, path_lookup: PathLookup, library: str) -> MaybeDependency: + """Pip install library and augment path look-up to resolve the library at import""" + # invoke pip install via subprocess to install this library into lib_install_folder + pip_install_arguments = ["pip", "install", library, "-t", self._temporary_virtual_environment.as_posix()] + try: + subprocess.run(pip_install_arguments, check=True) + except CalledProcessError as e: + problem = DependencyProblem("library-install-failed", f"Failed to install {library}: {e}") + return MaybeDependency(None, [problem]) + + path_lookup.append_path(self._temporary_virtual_environment) + return MaybeDependency(None, []) + + @cached_property + def _temporary_virtual_environment(self) -> Path: + # TODO: for `databricks labs ucx lint-local-code`, detect if we already have a virtual environment + # and use that one. See Databricks CLI code for the labs command to see how to detect the virtual + # environment. If we don't have a virtual environment, create a temporary one. + # simulate notebook-scoped virtual environment + lib_install_folder = tempfile.mkdtemp(prefix="ucx-") + return Path(lib_install_folder) + COMMENTED_OUT_FOR_PR_1685 = """ class SitePackageContainer(SourceContainer): @@ -42,6 +85,8 @@ def __init__(self, packages: list[SitePackage]): self._packages[top_level] = package def __getitem__(self, item: str) -> SitePackage | None: + # TODO: Replace hyphen with underscores + # TODO: Don't use get, raise KeyError return self._packages.get(item, None) diff --git a/tests/integration/contexts/test_global_context.py b/tests/integration/contexts/test_global_context.py deleted file mode 100644 index 99c2c2fb5b..0000000000 --- a/tests/integration/contexts/test_global_context.py +++ /dev/null @@ -1,9 +0,0 @@ -from databricks.labs.ucx.contexts.application import GlobalContext - - -def test_cached_properties(): - # purpose of the below is merely to improve test coverage - ctx = GlobalContext() - assert ctx.site_packages is not None - assert ctx.site_packages_path is not None - assert ctx.dependency_resolver is not None diff --git a/tests/integration/contexts/__init__.py b/tests/unit/contexts/__init__.py similarity index 100% rename from tests/integration/contexts/__init__.py rename to tests/unit/contexts/__init__.py diff --git a/tests/unit/contexts/test_application.py b/tests/unit/contexts/test_application.py new file mode 100644 index 0000000000..373b631374 --- /dev/null +++ b/tests/unit/contexts/test_application.py @@ -0,0 +1,12 @@ +import pytest + +from databricks.labs.ucx.contexts.application import GlobalContext + + +@pytest.mark.parametrize("attribute", ["dependency_resolver", "pip_resolver", "site_packages", "site_packages_path"]) +def test_global_context_attributes_not_none(attribute: str): + """Attributes should be not None""" + # Goal is to improve test coverage + ctx = GlobalContext() + assert hasattr(ctx, attribute) + assert getattr(ctx, attribute) is not None diff --git a/tests/unit/source_code/notebooks/__init__.py b/tests/unit/source_code/notebooks/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/unit/source_code/notebooks/test_cells.py b/tests/unit/source_code/notebooks/test_cells.py new file mode 100644 index 0000000000..5c66b08a32 --- /dev/null +++ b/tests/unit/source_code/notebooks/test_cells.py @@ -0,0 +1,88 @@ +from pathlib import Path +from unittest.mock import create_autospec + +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell +from databricks.labs.ucx.source_code.notebooks.loaders import ( + NotebookResolver, + NotebookLoader, +) +from databricks.labs.ucx.source_code.site_packages import PipResolver + + +def test_pip_cell_language_is_pip(): + assert PipCell("code").language == CellLanguage.PIP + + +def test_pip_cell_build_dependency_graph_invokes_register_library(): + graph = create_autospec(DependencyGraph) + + code = "%pip install databricks" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 0 + graph.register_library.assert_called_once_with("databricks") + + +def test_pip_cell_build_dependency_graph_pip_registers_missing_library(): + graph = create_autospec(DependencyGraph) + + code = "%pip install" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message == "Missing arguments in '%pip install'" + graph.register_library.assert_not_called() + + +def test_pip_cell_build_dependency_graph_reports_incorrect_syntax(): + graph = create_autospec(DependencyGraph) + + code = "%pip installl pytest" # typo on purpose + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message == "Unsupported %pip command: installl" + graph.register_library.assert_not_called() + + +def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_lookup): + dependency = Dependency(FileLoader(), Path("test")) + notebook_loader = NotebookLoader() + notebook_resolver = NotebookResolver(notebook_loader) + dependency_resolver = DependencyResolver([PipResolver()], notebook_resolver, [], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + + code = "%pip install unknown-library-name" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install unknown-library-name") + + +def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lookup): + dependency = Dependency(FileLoader(), Path("test")) + notebook_loader = NotebookLoader() + notebook_resolver = NotebookResolver(notebook_loader) + dependency_resolver = DependencyResolver([PipResolver()], notebook_resolver, [], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + + code = "%pip install pytest" + cell = PipCell(code) + + problems = cell.build_dependency_graph(graph) + + assert len(problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() diff --git a/tests/unit/source_code/test_dependencies.py b/tests/unit/source_code/test_dependencies.py index 55d89a43aa..399e162f86 100644 --- a/tests/unit/source_code/test_dependencies.py +++ b/tests/unit/source_code/test_dependencies.py @@ -23,7 +23,7 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root3.run.py")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"} @@ -32,7 +32,7 @@ def test_dependency_graph_builder_visits_workspace_notebook_dependencies(mock_pa def test_dependency_graph_builder_visits_local_notebook_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root4.py.txt", "leaf3.py.txt"} @@ -47,7 +47,7 @@ def test_dependency_graph_builder_visits_workspace_file_dependencies(mock_path_l WhitelistResolver(whi), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path('./root8.py')) assert not maybe.failed assert maybe.graph.all_relative_names() == {'leaf1.py.txt', 'leaf2.py.txt', 'root8.py.txt'} @@ -62,7 +62,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook WhitelistResolver(whi), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root1-no-leaf.run.py")) assert list(maybe.problems) == [ DependencyProblem( @@ -80,7 +80,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_workspace_notebook def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dependency(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root4-no-leaf.py")) assert list(maybe.problems) == [ DependencyProblem( @@ -92,7 +92,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_local_notebook_dep def test_dependency_graph_builder_raises_problem_with_non_constant_local_notebook_dependency(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path('root10.py.txt')) assert list(maybe.problems) == [ DependencyProblem( @@ -110,7 +110,7 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_local_noteboo def test_dependency_graph_builder_raises_problem_with_invalid_run_cell(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path('leaf6.py.txt')) assert list(maybe.problems) == [ DependencyProblem('invalid-run-cell', 'Missing notebook path in %run command', Path('leaf6.py.txt'), 5, 0, 5, 4) @@ -121,7 +121,7 @@ def test_dependency_graph_builder_visits_recursive_file_dependencies(mock_path_l notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("root6.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root6.py.txt", "root5.py.txt", "leaf4.py.txt"} @@ -131,7 +131,7 @@ def test_dependency_graph_builder_raises_problem_with_unresolved_import(mock_pat notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path('root7.py.txt')) assert list(maybe.problems) == [ DependencyProblem( @@ -144,7 +144,7 @@ def test_dependency_graph_builder_raises_problem_with_non_constant_notebook_argu notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("run_notebooks.py.txt")) assert list(maybe.problems) == [ DependencyProblem( @@ -163,7 +163,7 @@ def test_dependency_graph_builder_visits_file_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("root5.py.txt")) assert not maybe.failed assert maybe.graph.all_relative_names() == {"root5.py.txt", "leaf4.py.txt"} @@ -173,7 +173,7 @@ def test_dependency_graph_builder_skips_builtin_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert not maybe.failed graph = maybe.graph @@ -187,7 +187,7 @@ def test_dependency_graph_builder_ignores_known_dependencies(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [WhitelistResolver(Whitelist()), LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("python_builtins.py.txt")) assert maybe.graph graph = maybe.graph @@ -204,7 +204,7 @@ def test_dependency_graph_builder_visits_site_packages(empty_index, mock_noteboo WhitelistResolver(Whitelist()), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-site-package.py.txt")) assert not maybe.failed graph = maybe.graph @@ -228,7 +228,7 @@ def test_dependency_graph_builder_resolves_sub_site_package(): LocalFileResolver(file_loader), WhitelistResolver(whitelist), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert maybe.graph maybe = maybe.graph.locate_dependency(Path(site_packages_path, "databricks", "labs", "lsql", "core.py")) @@ -237,7 +237,7 @@ def test_dependency_graph_builder_resolves_sub_site_package(): def test_dependency_graph_builder_raises_problem_with_unfound_root_file(mock_path_lookup, mock_notebook_resolver): import_resolvers = [LocalFileResolver(FileLoader())] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("non-existing.py.txt")) assert list(maybe.problems) == [ DependencyProblem('file-not-found', 'File not found: non-existing.py.txt', Path("non-existing.py.txt")) @@ -247,7 +247,7 @@ def test_dependency_graph_builder_raises_problem_with_unfound_root_file(mock_pat def test_dependency_graph_builder_raises_problem_with_unfound_root_notebook(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("unknown_notebook")) assert list(maybe.problems) == [ DependencyProblem('notebook-not-found', 'Notebook not found: unknown_notebook', Path("unknown_notebook")) @@ -262,7 +262,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So file_loader = FailingFileLoader() import_resolvers = [LocalFileResolver(file_loader)] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert list(maybe.problems) == [ DependencyProblem( @@ -279,7 +279,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So notebook_loader = FailingNotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(Path("root5.py.txt")) assert list(maybe.problems) == [ DependencyProblem('cannot-load-notebook', 'Could not load notebook root5.py.txt', Path('')) @@ -287,7 +287,7 @@ def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> So def test_dependency_graph_builder_raises_problem_with_missing_file_loader(mock_notebook_resolver, mock_path_lookup): - dependency_resolver = DependencyResolver(mock_notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(Path("import-sub-site-package.py.txt")) assert list(maybe.problems) == [ DependencyProblem('missing-file-resolver', 'Missing resolver for local files', Path('')) @@ -296,5 +296,5 @@ def test_dependency_graph_builder_raises_problem_with_missing_file_loader(mock_n def test_dependency_graph_builder_repr(mock_notebook_resolver, mock_path_lookup): # improve test coverage - dependency_resolver = DependencyResolver(mock_notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, [], mock_path_lookup) assert len(repr(dependency_resolver)) > 0 diff --git a/tests/unit/source_code/test_graph.py b/tests/unit/source_code/test_graph.py new file mode 100644 index 0000000000..00aa6a6f45 --- /dev/null +++ b/tests/unit/source_code/test_graph.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver, StubLibraryResolver +from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader +from databricks.labs.ucx.source_code.site_packages import PipResolver + + +def test_dependency_graph_registers_library(mock_path_lookup): + dependency = Dependency(FileLoader(), Path("test")) + dependency_resolver = DependencyResolver([PipResolver()], NotebookResolver(NotebookLoader()), [], mock_path_lookup) + graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + + problems = graph.register_library("pytest") + + assert len(problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() + + +def test_stub_import_resolver_fails_with_library_not_found_dependency_problem(mock_path_lookup): + resolver = StubLibraryResolver() + maybe = resolver.resolve_library(mock_path_lookup, "test") + + assert len(maybe.problems) == 1 + assert maybe.problems[0].code == "library-not-found" diff --git a/tests/unit/source_code/test_jobs.py b/tests/unit/source_code/test_jobs.py new file mode 100644 index 0000000000..d6cf50ae75 --- /dev/null +++ b/tests/unit/source_code/test_jobs.py @@ -0,0 +1,73 @@ +from pathlib import Path +from unittest.mock import create_autospec + +import pytest +from databricks.sdk import WorkspaceClient +from databricks.sdk.service import compute, jobs + +from databricks.labs.ucx.source_code.files import FileLoader +from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver +from databricks.labs.ucx.source_code.jobs import WorkflowTaskContainer +from databricks.labs.ucx.source_code.notebooks.loaders import NotebookResolver, NotebookLoader +from databricks.labs.ucx.source_code.site_packages import PipResolver + + +@pytest.fixture +def graph(mock_path_lookup) -> DependencyGraph: + dependency = Dependency(FileLoader(), Path("test")) + dependency_resolver = DependencyResolver([PipResolver()], NotebookResolver(NotebookLoader()), [], mock_path_lookup) + dependency_graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup) + return dependency_graph + + +def test_workflow_task_container_builds_dependency_graph_not_yet_implemented(mock_path_lookup, graph): + # Goal of test is to raise test coverage, remove after implementing + ws = create_autospec(WorkspaceClient) + library = compute.Library(jar="library.jar", egg="library.egg", whl="libary.whl", requirements="requirements.txt") + task = jobs.Task(task_key="test", libraries=[library], existing_cluster_id="id") + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 5 + assert all(problem.code == "not-yet-implemented" for problem in problems) + ws.assert_not_called() + + +def test_workflow_task_container_builds_dependency_graph_empty_task(mock_path_lookup, graph): + ws = create_autospec(WorkspaceClient) + task = jobs.Task(task_key="test") + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 0 + ws.assert_not_called() + + +def test_workflow_task_container_builds_dependency_graph_pytest_pypi_library(mock_path_lookup, graph): + ws = create_autospec(WorkspaceClient) + libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="pytest"))] + task = jobs.Task(task_key="test", libraries=libraries) + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 0 + assert graph.path_lookup.resolve(Path("pytest")).exists() + ws.assert_not_called() + + +def test_workflow_task_container_builds_dependency_graph_unknown_pypi_library(mock_path_lookup, graph): + ws = create_autospec(WorkspaceClient) + libraries = [compute.Library(pypi=compute.PythonPyPiLibrary(package="unknown-library-name"))] + task = jobs.Task(task_key="test", libraries=libraries) + + workflow_task_container = WorkflowTaskContainer(ws, task) + problems = workflow_task_container.build_dependency_graph(graph) + + assert len(problems) == 1 + assert problems[0].code == "library-install-failed" + assert problems[0].message.startswith("Failed to install unknown-library-name") + assert mock_path_lookup.resolve(Path("unknown-library-name")) is None + ws.assert_not_called() diff --git a/tests/unit/source_code/test_notebook.py b/tests/unit/source_code/test_notebook.py index 4c49354335..d4ecebc99f 100644 --- a/tests/unit/source_code/test_notebook.py +++ b/tests/unit/source_code/test_notebook.py @@ -128,7 +128,7 @@ def test_notebook_generates_runnable_cells(source: tuple[str, Language, list[str def test_notebook_builds_leaf_dependency_graph(mock_path_lookup): notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path("leaf1.py.txt")) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -146,7 +146,7 @@ def test_notebook_builds_depth1_dependency_graph(mock_path_lookup): paths = ["root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -159,7 +159,7 @@ def test_notebook_builds_depth2_dependency_graph(mock_path_lookup): paths = ["root2.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -172,7 +172,7 @@ def test_notebook_builds_dependency_graph_avoiding_duplicates(mock_path_lookup): paths = ["root3.run.py.txt", "root1.run.py.txt", "leaf1.py.txt", "leaf2.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -186,7 +186,7 @@ def test_notebook_builds_cyclical_dependency_graph(mock_path_lookup): paths = ["cyclical1.run.py.txt", "cyclical2.run.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) @@ -199,7 +199,7 @@ def test_notebook_builds_python_dependency_graph(mock_path_lookup): paths = ["root4.py.txt", "leaf3.py.txt"] notebook_loader = NotebookLoader() notebook_resolver = NotebookResolver(notebook_loader) - dependency_resolver = DependencyResolver(notebook_resolver, [], mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, [], mock_path_lookup) maybe = dependency_resolver.resolve_notebook(mock_path_lookup, Path(paths[0])) graph = DependencyGraph(maybe.dependency, None, dependency_resolver, mock_path_lookup) container = maybe.dependency.load(mock_path_lookup) diff --git a/tests/unit/source_code/test_path_lookup_simulation.py b/tests/unit/source_code/test_path_lookup_simulation.py index 8b33813038..1e97bbc2ac 100644 --- a/tests/unit/source_code/test_path_lookup_simulation.py +++ b/tests/unit/source_code/test_path_lookup_simulation.py @@ -45,7 +45,7 @@ def test_locates_notebooks(source: list[str], expected: int, mock_path_lookup): WhitelistResolver(Whitelist()), LocalFileResolver(file_loader), ] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_notebook_dependency_graph(notebook_path) assert not maybe.problems assert maybe.graph is not None @@ -73,7 +73,7 @@ def test_locates_files(source: list[str], expected: int): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_local_file_dependency_graph(file_path) assert not maybe.problems assert maybe.graph is not None @@ -114,7 +114,7 @@ def test_locates_notebooks_with_absolute_path(): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path) assert not maybe.problems assert maybe.graph is not None @@ -155,7 +155,7 @@ def func(): WhitelistResolver(whitelist), LocalFileResolver(file_loader), ] - resolver = DependencyResolver(notebook_resolver, import_resolvers, lookup) + resolver = DependencyResolver([], notebook_resolver, import_resolvers, lookup) maybe = resolver.build_notebook_dependency_graph(parent_file_path) assert not maybe.problems assert maybe.graph is not None diff --git a/tests/unit/source_code/test_s3fs.py b/tests/unit/source_code/test_s3fs.py index bab09d9011..311f768cd5 100644 --- a/tests/unit/source_code/test_s3fs.py +++ b/tests/unit/source_code/test_s3fs.py @@ -117,7 +117,7 @@ def test_detect_s3fs_import(empty_index, source: str, expected: list[DependencyP file_loader = FileLoader() notebook_resolver = NotebookResolver(notebook_loader) import_resolvers = [LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], notebook_resolver, import_resolvers, mock_path_lookup) maybe = dependency_resolver.build_local_file_dependency_graph(sample) assert maybe.problems == [_.replace(source_path=sample) for _ in expected] @@ -145,7 +145,7 @@ def test_detect_s3fs_import_in_dependencies( file_loader = FileLoader() whitelist = Whitelist.parse(yml.read_text()) import_resolvers = [LocalFileResolver(file_loader), WhitelistResolver(whitelist)] - dependency_resolver = DependencyResolver(mock_notebook_resolver, import_resolvers, mock_path_lookup) + dependency_resolver = DependencyResolver([], mock_notebook_resolver, import_resolvers, mock_path_lookup) sample = mock_path_lookup.cwd / "root9.py.txt" maybe = dependency_resolver.build_local_file_dependency_graph(sample) assert maybe.problems == expected diff --git a/tests/unit/source_code/test_site_packages.py b/tests/unit/source_code/test_site_packages.py index 1a5eaaaa51..b6920bea33 100644 --- a/tests/unit/source_code/test_site_packages.py +++ b/tests/unit/source_code/test_site_packages.py @@ -1,7 +1,27 @@ -from databricks.labs.ucx.source_code.site_packages import SitePackages +from pathlib import Path + +from databricks.labs.ucx.source_code.site_packages import PipResolver, SitePackages from tests.unit import locate_site_packages +def test_pip_resolver_resolves_library(mock_path_lookup): + pip_installer = PipResolver() + maybe = pip_installer.resolve_library(mock_path_lookup, "pytest") + + assert len(maybe.problems) == 0 + assert mock_path_lookup.resolve(Path("pytest")).exists() + + +def test_pip_resolver_does_not_resolve_unknown_library(mock_path_lookup): + pip_installer = PipResolver() + maybe = pip_installer.resolve_library(mock_path_lookup, "unknown-library-name") + + assert len(maybe.problems) == 1 + assert maybe.problems[0].code == "library-install-failed" + assert maybe.problems[0].message.startswith("Failed to install unknown-library-name") + assert mock_path_lookup.resolve(Path("unknown-library-name")) is None + + def test_reads_site_packages(): site_packages_path = locate_site_packages() site_packages = SitePackages.parse(site_packages_path)