Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for migrating external location permissions from interactive cluster mounts #1487

Merged
merged 37 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
cd98886
blank PR
HariGS-DB Apr 22, 2024
d06bcb2
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 22, 2024
0bb495a
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 25, 2024
a3109a9
code changes for azure location acl
HariGS-DB Apr 25, 2024
c585961
added azure unit test cases
HariGS-DB Apr 25, 2024
f3bfe91
azure int tests
HariGS-DB Apr 25, 2024
c5f94a0
azure int tests
HariGS-DB Apr 25, 2024
3758cdf
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 25, 2024
da787e0
azure int tests
HariGS-DB Apr 25, 2024
91f99a9
azure int tests
HariGS-DB Apr 25, 2024
cbd4d55
refactoring
HariGS-DB Apr 26, 2024
ba72b25
refactoring
HariGS-DB Apr 26, 2024
efdf064
formatting, removing unused argument
HariGS-DB Apr 26, 2024
758d420
formatting, removing unused argument
HariGS-DB Apr 26, 2024
6efe156
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 26, 2024
b1a0afd
pylint fixes
HariGS-DB Apr 26, 2024
6686452
pylint fixes
HariGS-DB Apr 26, 2024
6bd1d61
Update src/databricks/labs/ucx/hive_metastore/grants.py
HariGS-DB Apr 26, 2024
5de41fb
aws cli context, addressing review comments
HariGS-DB Apr 27, 2024
499c7e7
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 27, 2024
933b284
aws int test
HariGS-DB Apr 27, 2024
3e2f8a7
Merge branch 'main' into feature/locationacl
HariGS-DB Apr 30, 2024
19f4d99
improving test coverage
HariGS-DB Apr 30, 2024
ba91587
int test changes
HariGS-DB May 1, 2024
2f79178
Merge branch 'main' into feature/locationacl
HariGS-DB May 1, 2024
8c9e081
Merge branch 'main' into feature/locationacl
HariGS-DB May 1, 2024
2ed0e97
feedback
nkvuong May 1, 2024
07eabb2
Merge branch 'feature/locationacl' of https://github.com/databricksla…
HariGS-DB May 1, 2024
c9d279e
feedback
nkvuong May 1, 2024
c34ff8c
revert
nkvuong May 1, 2024
83bafe5
fmt
nkvuong May 1, 2024
2e85336
fmt
nkvuong May 1, 2024
2b92d20
Merge branch 'feature/locationacl' of https://github.com/databricksla…
HariGS-DB May 1, 2024
1276ec7
int test changes
HariGS-DB May 1, 2024
05abdbc
Merge branch 'main' into feature/locationacl
HariGS-DB May 1, 2024
ff56a6f
int test changes
HariGS-DB May 1, 2024
418316e
init space
HariGS-DB May 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/databricks/labs/ucx/aws/access.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
from databricks.sdk.service.compute import Policy


from databricks.labs.ucx.assessment.aws import (
AWSInstanceProfile,
AWSResources,
Expand All @@ -21,21 +22,23 @@
)
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.hive_metastore import ExternalLocations
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL
from databricks.labs.ucx.hive_metastore.locations import ExternalLocation


class AWSResourcePermissions:
UC_ROLES_FILE_NAMES: typing.ClassVar[str] = "uc_roles_access.csv"
INSTANCE_PROFILES_FILE_NAMES: typing.ClassVar[str] = "aws_instance_profile_info.csv"

def __init__(
def __init__( # pylint: disable=too-many-arguments
self,
installation: Installation,
ws: WorkspaceClient,
backend: SqlBackend,
aws_resources: AWSResources,
external_locations: ExternalLocations,
schema: str,
principal_acl: PrincipalACL,
aws_account_id=None,
kms_key=None,
):
Expand All @@ -48,6 +51,7 @@ def __init__(
self._aws_account_id = aws_account_id
self._kms_key = kms_key
self._filename = self.INSTANCE_PROFILES_FILE_NAMES
self._principal_acl = principal_acl

def create_uc_roles_cli(self, *, single_role=True, role_name="UC_ROLE", policy_name="UC_POLICY"):
# Get the missing paths
Expand Down Expand Up @@ -275,6 +279,7 @@ def create_external_locations(self, location_init="UCX_location"):
external_location_name, path, credential_dict[role_arn], skip_validation=True
)
external_location_num += 1
self._principal_acl.apply_location_acl()

def get_instance_profile(self, instance_profile_name: str) -> AWSInstanceProfile | None:
instance_profile_arn = self._aws_resources.get_instance_profile(instance_profile_name)
Expand Down
6 changes: 5 additions & 1 deletion src/databricks/labs/ucx/azure/locations.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@

from databricks.sdk import WorkspaceClient
from databricks.sdk.errors.platform import InvalidParameterValue, PermissionDenied

from databricks.labs.ucx.azure.access import AzureResourcePermissions
from databricks.labs.ucx.azure.resources import AzureResources
from databricks.labs.ucx.hive_metastore import ExternalLocations
from databricks.labs.ucx.hive_metastore.grants import PrincipalACL


logger = logging.getLogger(__name__)

Expand All @@ -18,11 +19,13 @@ def __init__(
hms_locations: ExternalLocations,
resource_permissions: AzureResourcePermissions,
azurerm: AzureResources,
principal_acl: PrincipalACL,
):
self._ws = ws
self._hms_locations = hms_locations
self._resource_permissions = resource_permissions
self._azurerm = azurerm
self._principal_acl = principal_acl

def _app_id_credential_name_mapping(self) -> tuple[dict[str, str], dict[str, str]]:
# list all storage credentials.
Expand Down Expand Up @@ -171,6 +174,7 @@ def run(self):
migrated_loc_urls.append(migrated_loc_url)

leftover_loc_urls = [url for url in missing_loc_urls if url not in migrated_loc_urls]
self._principal_acl.apply_location_acl()
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
if leftover_loc_urls:
logger.info(
"External locations below are not created in UC. You may check following cases and rerun this command:"
Expand Down
2 changes: 2 additions & 0 deletions src/databricks/labs/ucx/contexts/cli_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def azure_external_locations_migration(self):
self.external_locations,
self.azure_resource_permissions,
self.azure_resources,
self.principal_acl,
)

@cached_property
Expand Down Expand Up @@ -154,6 +155,7 @@ def aws_resource_permissions(self):
self.aws_resources,
self.external_locations,
self.inventory_database,
self.principal_acl,
self.named_parameters.get("aws_account_id"),
self.named_parameters.get("kms_key"),
)
Expand Down
63 changes: 58 additions & 5 deletions src/databricks/labs/ucx/hive_metastore/grants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import typing
from collections import defaultdict
from collections.abc import Iterable
from dataclasses import dataclass
Expand All @@ -9,15 +10,21 @@
from databricks.labs.lsql.backends import SqlBackend
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import ResourceDoesNotExist, NotFound
from databricks.sdk.service.catalog import ExternalLocationInfo, SchemaInfo, TableInfo
from databricks.sdk.service.catalog import (
ExternalLocationInfo,
SchemaInfo,
TableInfo,
Privilege,
PermissionsChange,
SecurableType,
)
from databricks.sdk.service.compute import ClusterSource, DataSecurityMode

from databricks.labs.ucx.assessment.aws import AWSRoleAction
from databricks.labs.ucx.assessment.azure import (
AzureServicePrincipalCrawler,
AzureServicePrincipalInfo,
)
from databricks.labs.ucx.aws.access import AWSResourcePermissions
from databricks.labs.ucx.azure.access import (
AzureResourcePermissions,
StoragePermissionMapping,
Expand All @@ -41,6 +48,12 @@
locations: dict[str, str]


@dataclass
class LocationACL:
location_name: str
principal: str


@dataclass(frozen=True)
class Grant:
principal: str
Expand Down Expand Up @@ -341,6 +354,8 @@


class AwsACL:
INSTANCE_PROFILES_FILE_NAMES: typing.ClassVar[str] = "aws_instance_profile_info.csv"
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved

def __init__(
self,
ws: WorkspaceClient,
Expand Down Expand Up @@ -390,9 +405,7 @@
logger.error(msg)
raise ResourceDoesNotExist(msg) from None

permission_mappings = self._installation.load(
list[AWSRoleAction], filename=AWSResourcePermissions.INSTANCE_PROFILES_FILE_NAMES
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
)
permission_mappings = self._installation.load(list[AWSRoleAction], filename=self.INSTANCE_PROFILES_FILE_NAMES)
if len(permission_mappings) == 0:
# if permission mapping is empty, raise an error to run principal_prefix cmd
msg = (
Expand Down Expand Up @@ -612,3 +625,43 @@
if acl.service_principal_name is not None:
principal_list.append(acl.service_principal_name)
return principal_list

def apply_location_acl(
self,
):
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
# Check the interactive cluster and the principals mapped to it
# identifies the spn or instance profile configured for the interactive cluster
# identifies any location the spn/instance profile have access to (read or write)
# applies create_external_table, create_external_volume and read_files permission for all location
# to the principal
logger.info(
"Applying permission for external location (CREATE EXTERNAL TABLE, "
"CREATE EXTERNAL VOLUME and READ_FILES for existing eligible interactive cluster users"
)
# get the eligible location mapped for each interactive cluster
permissions = [
Privilege.CREATE_EXTERNAL_TABLE,
Privilege.CREATE_EXTERNAL_VOLUME,
Privilege.READ_FILES,
]
for cluster_id, locations in self._cluster_locations.items():
# get interactive cluster users
principals = self._get_cluster_principal_mapping(cluster_id)
if len(principals) == 0:
continue
for location_url, _ in locations.items():
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
# get the location name for the given url
location_name = self._get_location_name(location_url)
for principal in principals:
self._ws.grants.update(
SecurableType.EXTERNAL_LOCATION,
location_name,
changes=[PermissionsChange(add=permissions, principal=principal)],
)
logger.info("Applied all the permission on external location")

def _get_location_name(self, location_url: str):
for location in self._ws.external_locations.list():
if location.url == location_url:
return location.name
return None

Check warning on line 667 in src/databricks/labs/ucx/hive_metastore/grants.py

View check run for this annotation

Codecov / codecov/patch

src/databricks/labs/ucx/hive_metastore/grants.py#L667

Added line #L667 was not covered by tests
12 changes: 12 additions & 0 deletions tests/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
def get_azure_spark_conf():
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
return {
"spark.databricks.cluster.profile": "singleNode",
"spark.master": "local[*]",
"fs.azure.account.auth.type.labsazurethings.dfs.core.windows.net": "OAuth",
"fs.azure.account.oauth.provider.type.labsazurethings.dfs.core.windows.net": "org.apache.hadoop.fs"
".azurebfs.oauth2.ClientCredsTokenProvider",
"fs.azure.account.oauth2.client.id.labsazurethings.dfs.core.windows.net": "dummy_application_id",
"fs.azure.account.oauth2.client.secret.labsazurethings.dfs.core.windows.net": "dummy",
"fs.azure.account.oauth2.client.endpoint.labsazurethings.dfs.core.windows.net": "https://login"
".microsoftonline.com/directory_12345/oauth2/token",
}
10 changes: 8 additions & 2 deletions tests/integration/aws/test_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def test_get_uc_compatible_roles(ws, env_or_skip, make_random):
assert compat_roles


def test_create_external_location(ws, env_or_skip, make_random, inventory_schema, sql_backend):
def test_create_external_location(ws, env_or_skip, make_random, inventory_schema, sql_backend, runtime_ctx):
profile = env_or_skip("AWS_DEFAULT_PROFILE")
rand = make_random(5).lower()
sql_backend.save_table(
Expand All @@ -49,6 +49,8 @@ def test_create_external_location(ws, env_or_skip, make_random, inventory_schema
aws,
ExternalLocations(ws, sql_backend, inventory_schema),
inventory_schema,
runtime_ctx.aws_acl,
runtime_ctx.principal_acl,
account_id,
)
aws_permissions.create_external_locations(location_init=f"UCX_LOCATION_{rand}")
Expand All @@ -62,7 +64,9 @@ def test_create_external_location(ws, env_or_skip, make_random, inventory_schema
assert external_location[0].credential_name == f"ucx_{rand}"


def test_create_uber_instance_profile(ws, env_or_skip, make_random, inventory_schema, sql_backend, make_cluster_policy):
def test_create_uber_instance_profile(
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
ws, env_or_skip, make_random, inventory_schema, sql_backend, make_cluster_policy, runtime_ctx
):
profile = env_or_skip("AWS_DEFAULT_PROFILE")
aws = AWSResources(profile)
account_id = aws.validate_connection().get("Account")
Expand All @@ -81,6 +85,8 @@ def test_create_uber_instance_profile(ws, env_or_skip, make_random, inventory_sc
aws,
ExternalLocations(ws, sql_backend, inventory_schema),
inventory_schema,
runtime_ctx.aws_acl,
runtime_ctx.principal_acl,
account_id,
)
aws_permissions.create_uber_principal(
Expand Down
13 changes: 11 additions & 2 deletions tests/integration/aws/test_credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


@pytest.fixture
def run_migration(ws, sql_backend, env_or_skip):
def run_migration(ws, sql_backend, env_or_skip, runtime_ctx):
HariGS-DB marked this conversation as resolved.
Show resolved Hide resolved
def inner(credentials: set[str], read_only=False) -> list[CredentialValidationResult]:
installation = MockInstallation(
{
Expand All @@ -30,7 +30,16 @@ def inner(credentials: set[str], read_only=False) -> list[CredentialValidationRe

aws = AWSResources(env_or_skip("AWS_DEFAULT_PROFILE"))
location = ExternalLocations(ws, sql_backend, "inventory_schema")
resource_permissions = AWSResourcePermissions(installation, ws, sql_backend, aws, location, "inventory_schema")
resource_permissions = AWSResourcePermissions(
installation,
ws,
sql_backend,
aws,
location,
"inventory_schema",
runtime_ctx.aws_acl,
runtime_ctx.principal_acl,
)

instance_profile_migration = IamRoleMigration(installation, ws, resource_permissions, CredentialManager(ws))

Expand Down
Loading
Loading