Skip to content

Commit

Permalink
Add pip cell installs to dependency graph (#1698)
Browse files Browse the repository at this point in the history
## Changes
Logic for registering import installed in a pip cell

### Linked issues
#1642
Follow up on #1692

### Functionality 

- [ ] added relevant user documentation
- [ ] added new CLI command
- [ ] modified existing command: `databricks labs ucx ...`
- [ ] added a new workflow
- [ ] modified existing workflow: `...`
- [ ] added a new table
- [ ] modified existing table: `...`

### Tests
<!-- How is this tested? Please see the checklist below and also
describe any other relevant tests -->

- [ ] manually tested
- [ ] added unit tests
- [ ] added integration tests
- [ ] verified on staging environment (screenshot attached)
  • Loading branch information
JCZuurmond authored May 15, 2024
1 parent c8fc07e commit 68ef91f
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 2 deletions.
1 change: 1 addition & 0 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph:
child_graph = DependencyGraph(dependency, self, self._resolver, self._path_lookup)
self._dependencies[dependency] = child_graph
container = dependency.load(self.path_lookup)
# TODO: Return either (child) graph OR problems
if not container:
problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path)
return MaybeGraph(child_graph, [problem])
Expand Down
13 changes: 11 additions & 2 deletions src/databricks/labs/ucx/source_code/notebooks/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,18 @@ def language(self):
def is_runnable(self) -> bool:
return True # TODO

def build_dependency_graph(self, _: DependencyGraph) -> list[DependencyProblem]:
def build_dependency_graph(self, graph: DependencyGraph) -> list[DependencyProblem]:
# TODO: https://github.com/databrickslabs/ucx/issues/1642
return []
# TODO: this is very basic code, we need to improve it
splits = self.original_code.split(' ')
if len(splits) < 3:
return [DependencyProblem("library-install-failed", f"Missing arguments in '{self.original_code}'")]
if splits[1] != "install":
return [DependencyProblem("library-install-failed", f"Unsupported %pip command: {splits[1]}")]
# TODO: we need to support different formats of the library name and etc
library = splits[2]

return graph.register_library(library)


class CellLanguage(Enum):
Expand Down
Empty file.
88 changes: 88 additions & 0 deletions tests/unit/source_code/notebooks/test_cells.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
from pathlib import Path
from unittest.mock import create_autospec

from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph, DependencyResolver
from databricks.labs.ucx.source_code.files import FileLoader
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, PipCell
from databricks.labs.ucx.source_code.notebooks.loaders import (
NotebookResolver,
NotebookLoader,
)
from databricks.labs.ucx.source_code.site_packages import PipResolver


def test_pip_cell_language_is_pip():
assert PipCell("code").language == CellLanguage.PIP


def test_pip_cell_build_dependency_graph_invokes_register_library():
graph = create_autospec(DependencyGraph)

code = "%pip install databricks"
cell = PipCell(code)

problems = cell.build_dependency_graph(graph)

assert len(problems) == 0
graph.register_library.assert_called_once_with("databricks")


def test_pip_cell_build_dependency_graph_pip_registers_missing_library():
graph = create_autospec(DependencyGraph)

code = "%pip install"
cell = PipCell(code)

problems = cell.build_dependency_graph(graph)

assert len(problems) == 1
assert problems[0].code == "library-install-failed"
assert problems[0].message == "Missing arguments in '%pip install'"
graph.register_library.assert_not_called()


def test_pip_cell_build_dependency_graph_reports_incorrect_syntax():
graph = create_autospec(DependencyGraph)

code = "%pip installl pytest" # typo on purpose
cell = PipCell(code)

problems = cell.build_dependency_graph(graph)

assert len(problems) == 1
assert problems[0].code == "library-install-failed"
assert problems[0].message == "Unsupported %pip command: installl"
graph.register_library.assert_not_called()


def test_pip_cell_build_dependency_graph_reports_unknown_library(mock_path_lookup):
dependency = Dependency(FileLoader(), Path("test"))
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup)
graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup)

code = "%pip install unknown-library-name"
cell = PipCell(code)

problems = cell.build_dependency_graph(graph)

assert len(problems) == 1
assert problems[0].code == "library-install-failed"
assert problems[0].message.startswith("Failed to install unknown-library-name")


def test_pip_cell_build_dependency_graph_resolves_installed_library(mock_path_lookup):
dependency = Dependency(FileLoader(), Path("test"))
notebook_loader = NotebookLoader()
notebook_resolver = NotebookResolver(notebook_loader)
dependency_resolver = DependencyResolver(notebook_resolver, [PipResolver()], mock_path_lookup)
graph = DependencyGraph(dependency, None, dependency_resolver, mock_path_lookup)

code = "%pip install pytest"
cell = PipCell(code)

problems = cell.build_dependency_graph(graph)

assert len(problems) == 0
assert graph.path_lookup.resolve(Path("pytest")).exists()

0 comments on commit 68ef91f

Please sign in to comment.