Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added databricks labs ucx create-uber-principal command to create Azure Service Principal for migration #976

Merged
merged 43 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
1a11849
labs update initia commit
HariGS-DB Feb 21, 2024
a48f57e
added logic for creating master spn and giving permission to storage
HariGS-DB Feb 22, 2024
bf95e7e
AzureResource test cases
HariGS-DB Feb 23, 2024
f2ff366
test fixes
HariGS-DB Feb 23, 2024
923f3e6
merge
HariGS-DB Feb 23, 2024
e49f028
merge fixes
HariGS-DB Feb 23, 2024
ced6c12
add resource test cases
HariGS-DB Feb 24, 2024
7110bf9
add access test cases
HariGS-DB Feb 24, 2024
6494c1f
add access test cases
HariGS-DB Feb 24, 2024
651f98b
add access test cases
HariGS-DB Feb 24, 2024
9abc6ba
fixes to int tests
HariGS-DB Feb 25, 2024
ee36d2e
fixes to int tests
HariGS-DB Feb 25, 2024
e208074
fmting
HariGS-DB Feb 25, 2024
572d4d1
cmting policy save
HariGS-DB Feb 25, 2024
f797a57
api change
HariGS-DB Feb 26, 2024
dca069e
mergring changes
HariGS-DB Mar 2, 2024
c232d14
changes to api call
HariGS-DB Mar 2, 2024
c3d71a1
function call change
HariGS-DB Mar 2, 2024
24033f0
merging
HariGS-DB Mar 2, 2024
eb6d8b2
commentin clusterpolicy backup
HariGS-DB Mar 2, 2024
7f4f35a
logging merge
HariGS-DB Mar 2, 2024
f143199
improving unit test coverage
HariGS-DB Mar 3, 2024
49d8731
improving int test coverage
HariGS-DB Mar 3, 2024
6d632e7
merging changes from main
HariGS-DB Mar 3, 2024
a82453f
cluster policy not found error
HariGS-DB Mar 3, 2024
4c1cecc
cli test fix
HariGS-DB Mar 3, 2024
7a46f88
skipping test
HariGS-DB Mar 3, 2024
634f257
skipping test
HariGS-DB Mar 3, 2024
d7831d1
fmting
HariGS-DB Mar 3, 2024
9afc09f
removing mocker.patch and adding dependency for blueprint 3.0
HariGS-DB Mar 4, 2024
109b521
Merge branch 'main' into feature/azureuber
HariGS-DB Mar 4, 2024
384ae40
added prompts, clusterpolicy save, resourceId from debug env
HariGS-DB Mar 4, 2024
8b2b9a5
changed secret code to secret scope
HariGS-DB Mar 4, 2024
a595ebf
updated propertymock with method
HariGS-DB Mar 4, 2024
5fde054
Merge branch 'main' into feature/azureuber
HariGS-DB Mar 4, 2024
5576994
commenting int test
HariGS-DB Mar 5, 2024
dfcd1d9
new blueprint reference
HariGS-DB Mar 5, 2024
ec81f9e
fmting
HariGS-DB Mar 5, 2024
832e5bb
Merge branch 'main' into feature/azureuber
HariGS-DB Mar 5, 2024
ec5d666
Merge branch 'main' into feature/azureuber
HariGS-DB Mar 5, 2024
826b119
removing policy.py
HariGS-DB Mar 5, 2024
b74e204
adding removing scope to uninstall and delete spn function
HariGS-DB Mar 5, 2024
fe7c7d0
adding removing scope to uninstall and delete spn function
HariGS-DB Mar 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions labs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ commands:
- name: aws-profile
description: AWS Profile to use for authentication

- name: create-uber-principal
description: For azure cloud, creates a service principal and gives STORAGE BLOB READER access on all the storage account
used by tables in the workspace and stores the spn info in the UCX cluster policy.
flags:
- name: subscription-id
description: Subscription to scan storage account in

- name: validate-groups-membership
description: Validate groups to check if the groups at account level and workspace level have different memberships
table_template: |-
Expand Down
110 changes: 108 additions & 2 deletions src/databricks/labs/ucx/azure/access.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
import json
import uuid
from dataclasses import dataclass

from databricks.labs.blueprint.installation import Installation
from databricks.labs.blueprint.tui import Prompts
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound
from databricks.sdk.service.catalog import Privilege

from databricks.labs.ucx.assessment.crawlers import logger
from databricks.labs.ucx.azure.resources import AzureResource, AzureResources
from databricks.labs.ucx.azure.resources import (
AzureAPIClient,
AzureResource,
AzureResources,
PrincipalSecret,
)
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
from databricks.labs.ucx.hive_metastore.locations import ExternalLocations
Expand Down Expand Up @@ -46,7 +55,12 @@ def for_cli(cls, ws: WorkspaceClient, product='ucx', include_subscriptions=None)
installation = Installation.current(ws, product)
config = installation.load(WorkspaceConfig)
sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
azurerm = AzureResources(ws, include_subscriptions=include_subscriptions)
azure_mgmt_client = AzureAPIClient(
ws.config.arm_environment.resource_manager_endpoint,
ws.config.arm_environment.service_management_endpoint,
)
graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com")
azurerm = AzureResources(azure_mgmt_client, graph_client, include_subscriptions)
locations = ExternalLocations(ws, sql_backend, config.inventory_database)
return cls(installation, ws, azurerm, locations)

Expand Down Expand Up @@ -91,6 +105,98 @@ def save_spn_permissions(self) -> str | None:
return None
return self._installation.save(storage_account_infos, filename=self._filename)

def _update_cluster_policy_definition(
self, policy_definition: str, storage_accounts: list[AzureResource], uber_principal: PrincipalSecret
) -> str:
policy_dict = json.loads(policy_definition)
tenant_id = self._azurerm.tenant_id()
endpoint = f"https://login.microsoftonline.com/{tenant_id}/oauth2/token"
for storage in storage_accounts:
policy_dict[
f"spark_conf.fs.azure.account.oauth2.client.id.{storage.storage_account}.dfs.core.windows.net"
] = self._policy_config(uber_principal.client.client_id)
policy_dict[
f"spark_conf.fs.azure.account.oauth.provider.type.{storage.storage_account}.dfs.core.windows.net"
] = self._policy_config("org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider")
policy_dict[
f"spark_conf.fs.azure.account.oauth2.client.endpoint.{storage.storage_account}.dfs.core.windows.net"
] = self._policy_config(endpoint)
policy_dict[f"spark_conf.fs.azure.account.auth.type.{storage.storage_account}.dfs.core.windows.net"] = (
self._policy_config("OAuth")
)
if uber_principal.secret is not None:
policy_dict[
f"spark_conf.fs.azure.account.oauth2.client.secret.{storage.storage_account}.dfs.core.windows.net"
] = self._policy_config(uber_principal.secret)
return json.dumps(policy_dict)

@staticmethod
def _policy_config(value: str):
return {"type": "fixed", "value": value}

def _update_cluster_policy_with_spn(
self, policy_id: str, storage_accounts: list[AzureResource], uber_principal: PrincipalSecret
):
try:
policy_definition = ""
cluster_policy = self._ws.cluster_policies.get(policy_id)
self._installation.save(cluster_policy, filename="policy-backup.json")
if cluster_policy.definition is not None:
policy_definition = self._update_cluster_policy_definition(
cluster_policy.definition, storage_accounts, uber_principal
)
if cluster_policy.name is not None:
self._ws.cluster_policies.edit(policy_id, cluster_policy.name, definition=policy_definition)
except NotFound:
msg = f"cluster policy {policy_id} not found, please run UCX installation to create UCX cluster policy"
raise NotFound(msg) from None

def create_uber_principal(self, prompts: Prompts):
config = self._installation.load(WorkspaceConfig)
display_name = f"unity-catalog-migration-{config.inventory_database}-{self._ws.get_workspace_id()}"
uber_principal_name = prompts.question(
"Enter a name for the uber service principal to be created", default=display_name
)
policy_id = config.policy_id
if policy_id is None:
msg = "UCX cluster policy not found in config. Please run latest UCX installation to set cluster policy"
logger.error(msg)
raise ValueError(msg) from None
if config.uber_spn_id is not None:
logger.error("Uber service principal already created for this workspace.")
return
used_storage_accounts = self._get_storage_accounts()
if len(used_storage_accounts) == 0:
logger.warning(
"There are no external table present with azure storage account. "
"Please check if assessment job is run"
)
return
storage_account_info = []
for storage in self._azurerm.storage_accounts():
if storage.storage_account in used_storage_accounts:
storage_account_info.append(storage)
logger.info("Creating service principal")
uber_principal = self._azurerm.create_service_principal(uber_principal_name)
config = self._installation.load(WorkspaceConfig)
config.uber_spn_id = uber_principal.client.client_id
self._installation.save(config)
logger.info(
f"Created service principal of client_id {config.uber_spn_id}. " f"Applying permission on storage accounts"
)
for storage in storage_account_info:
role_name = str(uuid.uuid4())
self._azurerm.apply_storage_permission(
uber_principal.client.object_id, storage, "STORAGE_BLOB_DATA_READER", role_name
)
logger.debug(
f"Storage Data Blob Reader permission applied for spn {config.uber_spn_id} "
f"to storage account {storage.storage_account}"
)
self._update_cluster_policy_with_spn(policy_id, storage_account_info, uber_principal)

logger.info(f"Update UCX cluster policy {policy_id} with spn connection details for storage accounts")

def load(self):
return self._installation.load(list[StoragePermissionMapping], filename=self._filename)

Expand Down
9 changes: 7 additions & 2 deletions src/databricks/labs/ucx/azure/credentials.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
AzureResourcePermissions,
StoragePermissionMapping,
)
from databricks.labs.ucx.azure.resources import AzureResources
from databricks.labs.ucx.azure.resources import AzureAPIClient, AzureResources
from databricks.labs.ucx.config import WorkspaceConfig
from databricks.labs.ucx.framework.crawlers import StatementExecutionBackend
from databricks.labs.ucx.hive_metastore.locations import ExternalLocations
Expand Down Expand Up @@ -171,7 +171,12 @@ def for_cli(cls, ws: WorkspaceClient, installation: Installation, prompts: Promp

config = installation.load(WorkspaceConfig)
sql_backend = StatementExecutionBackend(ws, config.warehouse_id)
azurerm = AzureResources(ws)
azure_mgmt_client = AzureAPIClient(
ws.config.arm_environment.resource_manager_endpoint,
ws.config.arm_environment.service_management_endpoint,
)
graph_client = AzureAPIClient("https://graph.microsoft.com", "https://graph.microsoft.com")
azurerm = AzureResources(azure_mgmt_client, graph_client)
locations = ExternalLocations(ws, sql_backend, config.inventory_database)

resource_permissions = AzureResourcePermissions(installation, ws, azurerm, locations)
Expand Down
Loading
Loading