Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fetch JupyterHub roles from Keycloak #2447

Merged
merged 12 commits into from
May 14, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def service_for_jhub_apps(name, url):
"external": True,
},
"oauth_no_confirm": True,
"oauth_redirect_uri": "/services/:name/oauth_callback",
}

c.JupyterHub.services.extend(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import json
import os
import urllib
from functools import reduce

from jupyterhub.traitlets import Callable
from oauthenticator.generic import GenericOAuthenticator
from traitlets import Bool, Unicode, Union


class KeyCloakOAuthenticator(GenericOAuthenticator):
"""
Since `oauthenticator` 16.3 `GenericOAuthenticator` supports group management.
This subclass adds role management on top of it, building on the new `manage_roles`
feature added in JupyterHub 5.0 (https://github.com/jupyterhub/jupyterhub/pull/4748).
"""

claim_roles_key = Union(
[Unicode(os.environ.get("OAUTH2_ROLES_KEY", "groups")), Callable()],
config=True,
help="""As `claim_groups_key` but for roles.""",
)

realm_api_url = Unicode(
config=True, help="""The keycloak REST API URL for the realm."""
)

reset_managed_roles_on_startup = Bool(True)

async def update_auth_model(self, auth_model):
auth_model = await super().update_auth_model(auth_model)
user_info = auth_model["auth_state"][self.user_auth_state_key]
user_roles = self._get_user_roles(user_info)
auth_model["roles"] = [{"name": role_name} for role_name in user_roles]
# note: because the roles check is comprehensive, we need to re-add the admin and user roles
if auth_model["admin"]:
auth_model["roles"].append({"name": "admin"})
if self.check_allowed(auth_model["name"], auth_model):
auth_model["roles"].append({"name": "user"})
return auth_model

async def load_managed_roles(self):
if not self.manage_roles:
raise ValueError(
"Managed roles can only be loaded when `manage_roles` is True"
)
token = await self._get_token()

# Get the clients list to find the "id" of "jupyterhub" client.
clients_data = await self._fetch_api(endpoint="clients/", token=token)
jupyterhub_clients = [
client for client in clients_data if client["clientId"] == "jupyterhub"
]
assert len(jupyterhub_clients) == 1
jupyterhub_client_id = jupyterhub_clients[0]["id"]

# Includes roles like "jupyterhub_admin", "jupyterhub_developer", "dask_gateway_developer"
client_roles = await self._fetch_api(
endpoint=f"clients/{jupyterhub_client_id}/roles", token=token
)
# Includes roles like "default-roles-nebari", "offline_access", "uma_authorization"
realm_roles = await self._fetch_api(endpoint="roles", token=token)
roles = {
role["name"]: {"name": role["name"], "description": role["description"]}
for role in [*realm_roles, *client_roles]
}
# we could use either `name` (e.g. "developer") or `path` ("/developer");
# since the default claim key returns `path`, it seems preferable.
group_name_key = "path"
for realm_role in realm_roles:
role_name = realm_role["name"]
role = roles[role_name]
# fetch role assignments to groups
groups = await self._fetch_api(f"roles/{role_name}/groups", token=token)
role["groups"] = [group[group_name_key] for group in groups]
# fetch role assignments to users
users = await self._fetch_api(f"roles/{role_name}/users", token=token)
role["users"] = [user["username"] for user in users]
for client_role in client_roles:
role_name = client_role["name"]
role = roles[role_name]
# fetch role assignments to groups
groups = await self._fetch_api(
f"clients/{jupyterhub_client_id}/roles/{role_name}/groups", token=token
)
role["groups"] = [group[group_name_key] for group in groups]
# fetch role assignments to users
users = await self._fetch_api(
f"clients/{jupyterhub_client_id}/roles/{role_name}/users", token=token
)
role["users"] = [user["username"] for user in users]

return list(roles.values())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤔 this still does not include all the roles that keycloak returns in oauth for users.

Currently it gives us: uma_authorization, offline_access, default-roles-nebari; jupyterhub_admin, dask_gateway_developer, dask_gateway_admin, and jupyterhub_developer, however the oauth response also includes the following (for non-admin user):

manage-account, argo-developer, dask_gateway_developer, grafana_viewer, conda_store_developer, argo-viewer, grafana_developer, manage-account-links, view-profile.

The difference appears to be largely explained by the oauth response including roles for all clients, whereas the logic above only loads the jupyterhub client roles. I think this is ok.


def _get_user_roles(self, user_info):
if callable(self.claim_roles_key):
return set(self.claim_roles_key(user_info))
try:
return set(reduce(dict.get, self.claim_roles_key.split("."), user_info))
except TypeError:
self.log.error(
f"The claim_roles_key {self.claim_roles_key} does not exist in the user token"
)
return set()

async def _get_token(self) -> str:
http = self.http_client

body = urllib.parse.urlencode(
{
"client_id": self.client_id,
"client_secret": self.client_secret,
"grant_type": "client_credentials",
}
)
response = await http.fetch(
self.token_url,
method="POST",
body=body,
)
data = json.loads(response.body)
return data["access_token"] # type: ignore[no-any-return]

async def _fetch_api(self, endpoint: str, token: str):
response = await self.http_client.fetch(
f"{self.realm_api_url}/{endpoint}",
method="GET",
headers={"Authorization": f"Bearer {token}"},
)
return json.loads(response.body)


c.JupyterHub.authenticator_class = KeyCloakOAuthenticator
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ resource "helm_release" "jupyterhub" {

repository = "https://jupyterhub.github.io/helm-chart/"
chart = "jupyterhub"
version = "3.2.1"
version = "4.0.0-0.dev.git.6586.h0a16e5a0"

values = concat([
file("${path.module}/values.yaml"),
Expand Down Expand Up @@ -130,6 +130,7 @@ resource "helm_release" "jupyterhub" {
"01-theme.py" = file("${path.module}/files/jupyterhub/01-theme.py")
"02-spawner.py" = file("${path.module}/files/jupyterhub/02-spawner.py")
"03-profiles.py" = file("${path.module}/files/jupyterhub/03-profiles.py")
"04-auth.py" = file("${path.module}/files/jupyterhub/04-auth.py")
}

services = {
Expand All @@ -143,25 +144,25 @@ resource "helm_release" "jupyterhub" {
# for simple key value configuration with jupyterhub traitlets
# this hub.config property should be used
config = {
JupyterHub = {
authenticator_class = "generic-oauth"
}
Authenticator = {
enable_auth_state = true
}
GenericOAuthenticator = {
KeyCloakOAuthenticator = {
client_id = module.jupyterhub-openid-client.config.client_id
client_secret = module.jupyterhub-openid-client.config.client_secret
oauth_callback_url = "https://${var.external-url}/hub/oauth_callback"
authorize_url = module.jupyterhub-openid-client.config.authentication_url
token_url = module.jupyterhub-openid-client.config.token_url
userdata_url = module.jupyterhub-openid-client.config.userinfo_url
realm_api_url = module.jupyterhub-openid-client.config.realm_api_url
login_service = "Keycloak"
username_claim = "preferred_username"
claim_groups_key = "groups"
claim_roles_key = "roles"
allowed_groups = ["/analyst", "/developer", "/admin"]
admin_groups = ["/admin"]
manage_groups = true
manage_roles = true
refresh_pre_spawn = true
validate_server_cert = false

Expand Down Expand Up @@ -283,6 +284,10 @@ module "jupyterhub-openid-client" {
var.jupyterhub-logout-redirect-url
]
jupyterlab_profiles_mapper = true
service-accounts-enabled = true
service-account-roles = [
"view-realm", "view-users", "view-clients"
]
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ resource "keycloak_openid_client" "main" {
access_type = "CONFIDENTIAL"
standard_flow_enabled = true

valid_redirect_uris = var.callback-url-paths
valid_redirect_uris = var.callback-url-paths
service_accounts_enabled = var.service-accounts-enabled
}


Expand Down Expand Up @@ -62,6 +63,33 @@ resource "keycloak_openid_user_attribute_protocol_mapper" "jupyterlab_profiles"
aggregate_attributes = true
}

data "keycloak_realm" "master" {
realm = "nebari"
}

data "keycloak_openid_client" "realm_management" {
realm_id = var.realm_id
client_id = "realm-management"
}

data "keycloak_role" "main-service" {
for_each = toset(var.service-account-roles)

realm_id = data.keycloak_realm.master.id
client_id = data.keycloak_openid_client.realm_management.id
name = each.key
}

resource "keycloak_openid_client_service_account_role" "main" {
for_each = toset(var.service-account-roles)

realm_id = var.realm_id
service_account_user_id = keycloak_openid_client.main.service_account_user_id
client_id = data.keycloak_openid_client.realm_management.id
role = data.keycloak_role.main-service[each.key].name
}


resource "keycloak_role" "main" {
for_each = toset(flatten(values(var.role_mapping)))

Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
output "config" {
description = "configuration credentials for connecting to openid client"
value = {
client_id = keycloak_openid_client.main.client_id
client_secret = keycloak_openid_client.main.client_secret
client_id = keycloak_openid_client.main.client_id
client_secret = keycloak_openid_client.main.client_secret
service_account_user_id = keycloak_openid_client.main.service_account_user_id

authentication_url = "https://${var.external-url}/auth/realms/${var.realm_id}/protocol/openid-connect/auth"
token_url = "https://${var.external-url}/auth/realms/${var.realm_id}/protocol/openid-connect/token"
userinfo_url = "https://${var.external-url}/auth/realms/${var.realm_id}/protocol/openid-connect/userinfo"
realm_api_url = "https://${var.external-url}/auth/admin/realms/${var.realm_id}"
callback_urls = var.callback-url-paths
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,19 @@ variable "external-url" {
}


variable "service-accounts-enabled" {
description = "Whether the client should have a service account created"
type = bool
default = false
}

variable "service-account-roles" {
description = "Roles to be granted to the service account. Requires setting service-accounts-enabled to true."
type = list(string)
default = []
}


variable "role_mapping" {
description = "Group to role mapping to establish for client"
type = map(list(string))
Expand Down
42 changes: 42 additions & 0 deletions tests/tests_deployment/test_jupyterhub_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import pytest

from tests.tests_deployment import constants
from tests.tests_deployment.utils import get_jupyterhub_session


@pytest.mark.filterwarnings("ignore::urllib3.exceptions.InsecureRequestWarning")
def test_jupyterhub_loads_roles_from_keycloak():
session = get_jupyterhub_session()
xsrf_token = session.cookies.get("_xsrf")
response = session.get(
f"https://{constants.NEBARI_HOSTNAME}/hub/api/users/{constants.KEYCLOAK_USERNAME}",
headers={"X-XSRFToken": xsrf_token},
verify=False,
)
user = response.json()
assert set(user["roles"]) == {
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is passing locally but failing on CI. I suspect this is because the CI is using docker images with older JupyterHub versions - if I am right, merging nebari-dev/nebari-docker-images#140 should make the tests green.

"user",
"manage-account",
"jupyterhub_developer",
"argo-developer",
"dask_gateway_developer",
"grafana_viewer",
"conda_store_developer",
"argo-viewer",
"grafana_developer",
"manage-account-links",
"view-profile",
}


@pytest.mark.filterwarnings("ignore::urllib3.exceptions.InsecureRequestWarning")
def test_jupyterhub_loads_groups_from_keycloak():
session = get_jupyterhub_session()
xsrf_token = session.cookies.get("_xsrf")
response = session.get(
f"https://{constants.NEBARI_HOSTNAME}/hub/api/users/{constants.KEYCLOAK_USERNAME}",
headers={"X-XSRFToken": xsrf_token},
verify=False,
)
user = response.json()
assert set(user["groups"]) == {"/analyst", "/developer", "/users"}
Loading