-
Notifications
You must be signed in to change notification settings - Fork 0
/
__init__.py
241 lines (206 loc) · 10.2 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
import inspect
import sys
import time
from nebari.schema import Base, ProviderEnum
from _nebari.stages.base import NebariTerraformStage
from nebari.hookspecs import NebariStage, hookimpl
from pathlib import Path
from typing import Any, Dict, List, Optional
from pydantic import field_validator
NUM_ATTEMPTS = 10
TIMEOUT = 10
CLIENT_NAME = "mlflow"
class MlflowConfigAWS(Base):
enable_s3_encryption: Optional[bool] = True
class MlflowConfigAzure(Base):
...
class MlflowProvidersInputSchema(Base):
enabled: bool = True
# provder specific config
aws: Optional[MlflowConfigAWS] = None
azure: Optional[MlflowConfigAzure] = None
class InputSchema(Base):
mlflow: MlflowProvidersInputSchema
class MlflowStage(NebariTerraformStage):
name = "mlflow"
priority = 102
wait = True # wait for install to complete on nebari deploy
input_schema = InputSchema
@property
def template_directory(self):
return (
Path(inspect.getfile(self.__class__)).parent
/ "template"
/ self.config.provider.value
)
def _attempt_keycloak_connection(
self,
keycloak_url,
username,
password,
master_realm_name,
client_id,
client_realm_name,
verify=False,
num_attempts=NUM_ATTEMPTS,
timeout=TIMEOUT,
):
from keycloak import KeycloakAdmin
from keycloak.exceptions import KeycloakError
for i in range(num_attempts):
try:
realm_admin = KeycloakAdmin(
keycloak_url,
username=username,
password=password,
user_realm_name=master_realm_name,
realm_name=client_realm_name,
client_id=client_id,
verify=verify,
)
c = realm_admin.get_client_id(CLIENT_NAME) # lookup client guid
existing_client = realm_admin.get_client(c) # query client info
if existing_client != None and existing_client["name"] == CLIENT_NAME:
print(f"Attempt {i+1} succeeded connecting to keycloak and nebari client={CLIENT_NAME} exists")
return True
else:
print(
f"Attempt {i+1} succeeded connecting to keycloak but nebari client={CLIENT_NAME} did not exist"
)
except KeycloakError as e:
print(f"Attempt {i+1} failed connecting to keycloak {client_realm_name} realm -- {e}")
time.sleep(timeout)
return False
def check(self, stage_outputs: Dict[str, Dict[str, Any]], disable_prompt=False) -> bool:
# TODO: Module requires EKS cluster is configured for IRSA. Once Nebari version with IRSA is released, should update
# this error message and also minimum Nebari version in pyproject.toml
if self.config.provider == ProviderEnum.aws:
try:
_ = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
except KeyError:
print(
"\nPrerequisite stage output(s) not found in stages/02-infrastructure: cluster_oidc_issuer_url. Please ensure Nebari version is at least XX."
)
return False
try:
_ = self.config.escaped_project_name
_ = self.config.provider
except KeyError:
print("\nBase config values not found: escaped_project_name, provider")
return False
keycloak_config = self.get_keycloak_config(stage_outputs)
if not self._attempt_keycloak_connection(
keycloak_url=keycloak_config["keycloak_url"],
username=keycloak_config["username"],
password=keycloak_config["password"],
master_realm_name=keycloak_config["master_realm_id"],
client_id=keycloak_config["master_client_id"],
client_realm_name=keycloak_config["realm_id"],
verify=False,
):
print(
f"ERROR: unable to connect to keycloak master realm and ensure that nebari client={CLIENT_NAME} exists"
)
sys.exit(1)
print(f"Keycloak successfully configured with {CLIENT_NAME} client")
elif self.config.provider == ProviderEnum.azure.value:
try:
_ = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
except KeyError:
print(
"\nPrerequisite stage output(s) not found in stages/02-infrastructure: cluster_oidc_issuer_url. Please ensure Nebari version is at least XX."
)
return False
try:
_ = self.config.escaped_project_name
_ = self.config.provider
except KeyError:
print("\nBase config values not found: escaped_project_name, provider")
return False
else:
raise NotImplementedError(f"Provider {self.config.provider} not implemented")
return True
def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]):
if self.config.provider == ProviderEnum.aws:
keycloak_config = self.get_keycloak_config(stage_outputs)
if not self.config.provider == ProviderEnum.aws:
raise KeyError(
"Plugin nebari_plugin_mlflow_aws developed for aws only. Detected provider is {}.".format(
self.config.provider
)
)
# TODO: Module requires EKS cluster is configured for IRSA. Once Nebari version with IRSA is released, should update
# this error message and also minimum Nebari version in pyproject.toml
try:
_ = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
except KeyError:
raise Exception(
"Prerequisite stage output(s) not found in stages/02-infrastructure: cluster_oidc_issuer_url. Please ensure Nebari version is at least XX."
)
try:
domain = stage_outputs["stages/04-kubernetes-ingress"]["domain"]
cluster_oidc_issuer_url = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
except KeyError:
raise Exception(
"Prerequisite stage output(s) not found: stages/02-infrastructure, stages/04-kubernetes-ingress"
)
chart_ns = self.config.mlflow.namespace
create_ns = True
if chart_ns == None or chart_ns == "" or chart_ns == self.config.namespace:
chart_ns = self.config.namespace
create_ns = False
return {
"chart_name": self.config.mlflow.name,
"project_name": self.config.escaped_project_name,
"region": self.config.amazon_web_services.region,
"realm_id": keycloak_config["realm_id"],
"client_id": CLIENT_NAME,
"base_url": f"https://{keycloak_config['domain']}/mlflow",
"external_url": keycloak_config["keycloak_url"],
"valid_redirect_uris": [f"https://{keycloak_config['domain']}/mlflow/_oauth"],
"signing_key_ref": {
"name": "forwardauth-deployment",
"kind": "Deployment",
"namespace": self.config.namespace,
},
"create_namespace": create_ns,
"enable_s3_encryption": self.config.mlflow.enable_s3_encryption,
"namespace": chart_ns,
"ingress_host": domain,
"cluster_oidc_issuer_url": cluster_oidc_issuer_url,
"overrides": self.config.mlflow.values,
}
elif self.config.provider == ProviderEnum.azure:
cluster_oidc_issuer_url = stage_outputs["stages/02-infrastructure"]["cluster_oidc_issuer_url"]["value"]
external_url = stage_outputs["stages/04-kubernetes-ingress"]["domain"]
resource_group_name = stage_outputs["stages/02-infrastructure"]["resource_group_name"]["value"]
forwardauth_service_name = stage_outputs["stages/07-kubernetes-services"]["forward-auth-service"]["value"]["name"]
forwardauth_middleware_name = stage_outputs["stages/07-kubernetes-services"]["forward-auth-middleware"]["value"]["name"]
return {
"enabled": self.config.mlflow.enabled,
"namespace": self.config.namespace,
"external_url": external_url,
"helm-release-name": self.config.project_name + '-mlflow',
"forwardauth-service-name": forwardauth_service_name,
"forwardauth-middleware-name": forwardauth_middleware_name,
"cluster_oidc_issuer_url": cluster_oidc_issuer_url,
"storage_resource_group_name": resource_group_name,
"region": self.config.azure.region,
"storage_account_name": self.config.project_name[:15] + 'mlfsa' + self.config.azure.storage_account_postfix,
}
else:
raise NotImplementedError(f"Provider {self.config.provider} not implemented")
def get_keycloak_config(self, stage_outputs: Dict[str, Dict[str, Any]]):
directory = "stages/05-kubernetes-keycloak"
return {
"domain": stage_outputs["stages/04-kubernetes-ingress"]["domain"],
"keycloak_url": f"{stage_outputs[directory]['keycloak_credentials']['value']['url']}/auth/",
"username": stage_outputs[directory]["keycloak_credentials"]["value"]["username"],
"password": stage_outputs[directory]["keycloak_credentials"]["value"]["password"],
"master_realm_id": stage_outputs[directory]["keycloak_credentials"]["value"]["realm"],
"master_client_id": stage_outputs[directory]["keycloak_credentials"]["value"]["client_id"],
"realm_id": stage_outputs["stages/06-kubernetes-keycloak-configuration"]["realm_id"]["value"],
}
@hookimpl
def nebari_stage() -> List[NebariStage]:
return [MlflowStage]