Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a new command databricks labs ucx lint-local-code #1594

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,9 @@ commands:
- name: migrate-local-code
description: (Experimental) Migrate files in the current directory to be more compatible with Unity Catalog.

- name: lint-local-code
description: (Experimental) Lint files in the current directory to highlight incompatibilities with Unity Catalog.

- name: show-all-metastores
is_account_level: true
description: Show all metastores available in the same region as the specified workspace
Expand Down
16 changes: 14 additions & 2 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from databricks.sdk.errors import NotFound

from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.contexts.cli_command import AccountContext, WorkspaceContext
from databricks.labs.ucx.contexts.cli_command import AccountContext, WorkspaceContext, LocalContext
from databricks.labs.ucx.hive_metastore.tables import What

ucx = App(__file__)
Expand Down Expand Up @@ -387,7 +387,7 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
@ucx.command
def migrate_local_code(w: WorkspaceClient, prompts: Prompts):
"""Fix the code files based on their language."""
ctx = WorkspaceContext(w)
ctx = LocalContext(w)
working_directory = Path.cwd()
if not prompts.confirm("Do you want to apply UC migration to all files in the current directory?"):
return
Expand Down Expand Up @@ -437,5 +437,17 @@ def migrate_tables(w: WorkspaceClient, prompts: Prompts, *, ctx: WorkspaceContex
deployed_workflows.run_workflow("migrate-external-hiveserde-tables-in-place-experimental")


@ucx.command
def lint_local_code(w: WorkspaceClient, ctx: LocalContext | None = None):
"""Lint local code files looking for problems in notebooks and python files."""
if ctx is None:
# TODO Add a local dependency builder to localcontext rather than Workspace, once Eric's PR hits
# Right now it is using the workspace loader to prove it all works but obviously will not find notebooks
# in the workspace
ctx = LocalContext(w)
working_directory = Path.cwd()
ctx.local_file_linter.lint(working_directory)


if __name__ == "__main__":
ucx()
19 changes: 14 additions & 5 deletions src/databricks/labs/ucx/contexts/cli_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources
from databricks.labs.ucx.contexts.application import GlobalContext
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader, LocalNotebookLoader
from databricks.labs.ucx.source_code.files import LocalFileMigrator
from databricks.labs.ucx.source_code.files import LocalFileMigrator, LocalFileLinter
from databricks.labs.ucx.workspace_access.clusters import ClusterAccess

logger = logging.getLogger(__name__)
Expand All @@ -44,10 +44,6 @@ def workspace_client(self) -> WorkspaceClient:
def sql_backend(self) -> SqlBackend:
return StatementExecutionBackend(self.workspace_client, self.config.warehouse_id)

@cached_property
def local_file_migrator(self):
return LocalFileMigrator(self.languages)

@cached_property
def cluster_access(self):
return ClusterAccess(self.installation, self.workspace_client, self.prompts)
Expand Down Expand Up @@ -202,3 +198,16 @@ def account_workspaces(self):
@cached_property
def account_metastores(self):
return AccountMetastores(self.account_client)


class LocalContext(WorkspaceContext):
"""Local context extends Workspace context to provide extra properties
for running local operations."""

@cached_property
def local_file_migrator(self):
return LocalFileMigrator(self.languages)

@cached_property
def local_file_linter(self):
return LocalFileLinter(self.tables_migrator.index(), self.dependency_graph_builder)
60 changes: 60 additions & 0 deletions src/databricks/labs/ucx/source_code/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@

from databricks.sdk.service.workspace import Language

from databricks.labs.ucx.hive_metastore.migration_status import MigrationIndex
from databricks.labs.ucx.source_code.languages import Languages
from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage, NOTEBOOK_HEADER
from databricks.labs.ucx.source_code.notebooks.sources import NotebookLinter
from databricks.labs.ucx.source_code.python_linter import PythonLinter, ASTLinter
from databricks.labs.ucx.source_code.graph import (
DependencyGraph,
Expand All @@ -18,6 +20,7 @@
DependencyLoader,
Dependency,
BaseDependencyResolver,
DependencyGraphBuilder,
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -222,3 +225,60 @@ def pop(self) -> Path:
@property
def paths(self) -> Iterable[Path]:
yield from self._paths


class LocalFileLinter:

def __init__(self, migration_index: MigrationIndex, graph_builder: DependencyGraphBuilder) -> None:
self._migration_index = migration_index
self._graph_builder = graph_builder
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL}

# TODO: Build dependency graph, use ws tools for directory walk
def lint(self, path: Path) -> bool:

if path.is_dir():
for child_path in path.iterdir():
self.lint(child_path)
return True
return self._lint_file(path)

def _lint_file(self, path: Path) -> bool:
if path.suffix not in self._extensions:
return False
language = self._extensions[path.suffix]
jimidle marked this conversation as resolved.
Show resolved Hide resolved
if not language:
return False
advised = False
logger.info(f"Analysing {path}")

# Recreate Language every time to reset the state
languages = Languages(self._migration_index)
# TODO: Change to use tools rather than with
with path.open("r") as f:

# Check dependencies
# TODO: Need a local version of this, pending Eric's PR
self._graph_builder.build_notebook_dependency_graph(path)
for problem in self._graph_builder.problems:
logger.info(f"Found: {problem}")
advised = True

code = f.read()
# Check to see if this is actually a notebook
if self.is_notebook(code):
notebook_linter = NotebookLinter.from_source(self._migration_index, code, language)
for advice in notebook_linter.lint():
logger.info(f"Found: {advice}")
advised = True
return advised

linter = languages.linter(language)
for advice in linter.lint(code):
logger.info(f"Found: {advice}")
advised = True
return advised

@staticmethod
def is_notebook(src: str) -> bool:
return NOTEBOOK_HEADER in src.split('\n', 1)[0]
3 changes: 3 additions & 0 deletions tests/unit/source_code/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,6 @@ def test_files_walks_directory():
files.apply(path)
languages.fixer.assert_called_with(Language.PYTHON, 'some-code')
assert languages.fixer.call_count > 1


# TODO: Add tests for the LocalFileLinter class once the NotebookLoader is implemented