-
Notifications
You must be signed in to change notification settings - Fork 67
/
Copy pathvalues.yaml
997 lines (934 loc) · 41.2 KB
/
values.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
# We define a service account that is attached by default to all Jupyter user pods
# and dask-gateway workers. By default, this has no permissions - although extra
# cloud access permissions may be granted - see docs/topic/features.md.
userServiceAccount:
enabled: true
annotations: {}
binderhub-service:
enabled: false
ingressBasicAuth:
enabled: false
# Primarily here for validation to 'work',
# as these are set in secret config otherwise. I don't like this,
# as we won't catch these values missing if they aren't set.
username: ""
password: ""
dex:
enabled: false
staticWebsite:
enabled: false
source:
git:
branch: main
githubAuth:
enabled: false
githubApp:
# Primarily here for validation to 'work',
# as these are set in secret config otherwise. I don't like this,
# as we won't catch these values missing if they aren't set.
id: 0
privateKey: ""
nfs:
enabled: true
dirsizeReporter:
enabled: true
shareCreator:
enabled: true
tolerations: []
pv:
enabled: true
mountOptions:
- soft
- noatime
- vers=4.2
# Use NFS provided by an in cluster server with the nfs-external-provisioner chart
inClusterNFS:
enabled: false
size: 100Gi
# A placeholder as global values that can be referenced from the same location
# of any chart should be possible to provide, but aren't necessarily provided or
# used.
global: {}
jupyterhub:
cull:
# Don't allow any user pods to run for longer than 7 days by default
maxAge: 604800 # 7 days in seconds
custom:
auth:
anonymizeUsername: false
singleuser:
extraPVCs: []
singleuserAdmin:
extraEnv: {}
extraVolumeMounts:
- name: home
mountPath: /home/jovyan/shared-readwrite
subPath: _shared
- name: home
mountPath: /home/rstudio/shared-readwrite
subPath: _shared
cloudResources:
provider: ""
gcp:
projectId: ""
scratchBucket:
enabled: false
2i2c:
# Should 2i2c engineering staff user IDs be injected to the admin_users
# configuration of the JupyterHub's authenticator by our custom
# jupyterhub_config.py snippet as declared in hub.extraConfig?
add_staff_user_ids_to_admin_users: false
add_staff_user_ids_of_type: ""
staff_github_ids:
- AIDEA775
- choldgraf
- colliand
- consideRatio
- damianavila
- GeorgianaElena
- jmunroe
- jnywong
- sgibson91
- yuvipanda
staff_google_ids:
- asilva@2i2c.org
- choldgraf@2i2c.org
- colliand@2i2c.org
- damianavila@2i2c.org
- erik@2i2c.org
- georgianaelena@2i2c.org
- jmunroe@2i2c.org
- jwong@2i2c.org
- sgibson@2i2c.org
- yuvipanda@2i2c.org
homepage:
gitRepoUrl: "https://github.com/2i2c-org/default-hub-homepage"
# TODO: make main the default branch in the repo above
gitRepoBranch: "master"
jupyterhubConfigurator:
enabled: true
ingress:
enabled: true
ingressClassName: nginx
annotations:
nginx.ingress.kubernetes.io/proxy-body-size: 256m
cert-manager.io/cluster-issuer: letsencrypt-prod
scheduling:
# We declare matchNodePurpose=require to get a nodeAffinity like a
# nodeSelector on all core pods and user pods. core pods like hub and proxy
# will schedule on nodes with hub.jupyter.org/node-purpose=core and user
# pods on nodes with hub.jupyter.org/node-purpose=user.
#
# Since this setting adds a nodeAffinity, its okay that we configure
# KubeSpawner's profile_list to override node_selector.
#
corePods:
nodeAffinity:
matchNodePurpose: require
userPods:
nodeAffinity:
matchNodePurpose: require
podPriority:
enabled: true
userPlaceholder:
enabled: true
replicas: 0
userScheduler:
enabled: false
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 64Mi
limits:
memory: 1G
# prePuller is about pulling a one or more images identified via chart
# configuration, including singleuser.image, singleuser.profileList entries
# with a dedicated image, but not profileList entries with images' specified
# via profile_options.
prePuller:
# continuous prePuller leads to the creation of a DaemonSet that starts a
# pod on each node to pull images.
#
# It is disabled as its only relevant for nodes started before user pods
# gets scheduled on them, in other cases it could delay startup and isn't
# expected to reduce startup times.
#
continuous:
enabled: false
# hook prePuller leads to the creation of a temporary DaemonSet and a pod
# awaiting pulling to complete before `helm upgrade` starts its main work.
#
# It is disabled as it adds notable complexity for a smaller benefit when
# correctly adopted. The added complexity includes:
#
# - risk of misconfiguration making image pulls not actually needed
# - risk of broken expectations and additional cognitive load
# - risk of causing significantly longer `helm upgrade` commands slowing
# down our CI system
# - ClusterRoleBinding resources are needed for the image-awaiter Pod
# involved, a resource that requires the highest k8s cluster permission
# otherwise possibly not needed to deploy basehub
#
hook:
enabled: false
proxy:
service:
type: ClusterIP
chp:
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 64Mi
limits:
memory: 1Gi
traefik:
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
# Note if autohttps pod's aren't used anywhere by our basehub
# deployments, we should simply remove this traefik configuration.
#
resources:
requests:
memory: 64Mi
limits:
memory: 1Gi
singleuser:
# Need to explicitly fix ownership here, as otherwise these directories will be owned
# by root on most NFS filesystems - neither EFS nor Google Filestore support anonuid
initContainers:
- name: volume-mount-ownership-fix
image: busybox:1.36.1
command:
- sh
- -c
- id && chown 1000:1000 /home/jovyan /home/jovyan/shared && ls -lhd /home/jovyan
securityContext:
runAsUser: 0
volumeMounts:
- name: home
mountPath: /home/jovyan
subPath: "{username}"
# Mounted without readonly attribute here,
# so we can chown it appropriately
- name: home
mountPath: /home/jovyan/shared
subPath: _shared
cmd:
# Explicitly define this, as it's no longer set by z2jh
# https://github.com/jupyterhub/zero-to-jupyterhub-k8s/pull/2449
- jupyterhub-singleuser
extraEnv:
# notebook writes secure files that don't need to survive a
# restart here. Writing 'secure' files on some file systems (like
# Azure Files with SMB) seems buggy, so we just put runtime dir on
# /tmp. This is ok in our case, since no two users are on the same
# container.
JUPYTER_RUNTIME_DIR: /tmp/.jupyter-runtime
# By default, /bin/sh is used as shell for terminals, not /bin/bash
# Most people do not expect this, so let's match expectation
SHELL: /bin/bash
extraFiles:
ipython_kernel_config.json:
mountPath: /usr/local/etc/ipython/ipython_kernel_config.json
data:
# This keeps a history of all executed code under $HOME, which is almost always on
# NFS. This file is kept as a sqlite file, and sqlite and NFS do not go together very
# well! Disable this to save ourselves from debugging random NFS oddities that are caused
# by this unholy sqlite + NFS mixture.
HistoryManager:
enabled: false
# jupyter_server and notebook are different jupyter servers providing
# similar configuration options. Since we have user images that may
# provide either, we provide the same configuration for both via
# jupyter_server_config.json and jupyter_notebook_config.json.
#
# A hub can force a choice with singleuser.extraEnv via:
#
# JUPYTERHUB_SINGLEUSER_APP: "notebook.notebookapp.NotebookApp"
# JUPYTERHUB_SINGLEUSER_APP: "jupyter_server.serverapp.ServerApp"
#
jupyter_server_config.json:
mountPath: /usr/local/etc/jupyter/jupyter_server_config.json
# if a user leaves a notebook with a running kernel,
# the effective idle timeout will typically be cull idle timeout
# of the server + the cull idle timeout of the kernel,
# as culling the kernel will register activity,
# resetting the no_activity timer for the server as a whole
data:
# Allow JupyterLab to show the 'View -> Show Hidden Files' option
# in the menu. Defaults are not changed.
# https://github.com/jupyterlab/jupyterlab/issues/11304#issuecomment-945466766
ContentsManager:
allow_hidden: true
# MappingKernelManager configuration reference:
# https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.services.kernels.html#jupyter_server.services.kernels.kernelmanager.MappingKernelManager
#
MappingKernelManager: &server_config_mapping_kernel_manager
cull_idle_timeout: 3600
cull_interval: 300
cull_connected: true
# ServerApp configuration reference:
# https://jupyter-server.readthedocs.io/en/latest/api/jupyter_server.html#jupyter_server.serverapp.ServerApp
#
ServerApp: &server_config_server_app
extra_template_paths:
- /usr/local/share/jupyter/custom_template
# Move the sqlite file used by https://github.com/jupyter-server/jupyter_server_fileid
# off the default path, which is under ~/.local/share/jupyter.
# That is NFS, and sqlite + NFS don't go well together. In addition,
# it uses WAL mode of sqlite, and that is completely unsupported on NFS
# Upstream discussion in https://github.com/jupyter-server/jupyter_server_fileid/issues/60.
BaseFileIdManager: &server_config_base_file_id_manager
db_path: /tmp/file_id_manager.db
jupyter_notebook_config.json:
mountPath: /usr/local/etc/jupyter/jupyter_notebook_config.json
data:
MappingKernelManager: *server_config_mapping_kernel_manager
NotebookApp: *server_config_server_app
BaseFileIdManager: *server_config_base_file_id_manager
startTimeout: 600 # 10 mins, node startup + image pulling sometimes takes more than the default 5min
defaultUrl: /tree
image:
name: jupyter/scipy-notebook
tag: "2023-06-19"
storage:
type: static
static:
pvcName: home-nfs
subPath: "{username}"
extraVolumeMounts:
- name: home
mountPath: /home/jovyan/shared
subPath: _shared
readOnly: true
# For all pods, mount home in both /home/jovyan (done via singleuser.storage.static)
# as well as /home/rstudio. This allows rocker images (which use the
# rstudio user and put home ine /home/rstudio) to be first class citizens
# along with jupyter based images, regardless of how they are specified (
# via the configurator, or with unlisted_choice, or as a profile). For non-rocker
# images, this is just invisible in the UI and there is no performance overhead
# for these extra bind mounts. An additional positive here is that in case *students*
# end up accidentally hardcoding paths in their notebooks, it will continue to work
# regardless of wether they or on RStudio or JupyterLab (described to us as a serious
# problem by openscapes)
- name: home
mountPath: /home/rstudio
subPath: "{username}"
- name: home
mountPath: /home/rstudio/shared
subPath: _shared
readOnly: true
memory:
guarantee: 256M
limit: 1G
cpu:
# If no CPU limit is set, it is possible for a single user or group of users to
# starve everyone else of CPU time on a node, even causing new user pods to completely
# fail as the notebook server process gets no CPU to complete auth handshake with
# the server, and even trivial cells like `print("hello world")` may not run.
# Unlike memory guarantees, CPU guarantees are actually enforced by the Linux Kernel
# (see https://medium.com/@betz.mark/understanding-resource-limits-in-kubernetes-cpu-time-9eff74d3161b)
# By giving each user a 5% CPU guarantee (represented by 0.05), we ensure that:
# 1. Simple cells will always execute
# 2. Notebook server processes will always start - so users won't have server spawn failure
# 3. We don't accidentally set just a high limit for a particular hub and not set a
# guarantee, at which point kubernetes treats the limit as the guarantee! This causes
# far more nodes to be scaled up than needed, making everything super slow (like in
# https://github.com/2i2c-org/infrastructure/issues/790)
# 4. Most of our workloads are still memory bound, and we want scaling to happen only
# when a node is full on its memory guarantees. But a 0.05 guarantee means a n1-highmem-8
# node can fit 160 user pods, and since kubernetes already caps us at 100 pods a node,
# this guarantee doesn't actually change our scheduling.
guarantee: 0.05
networkPolicy:
# Allow unrestricted access to the internet but not local cluster network
enabled: true
egress:
- to:
- ipBlock:
cidr: 0.0.0.0/0
except:
# Don't allow network access to private IP ranges
# Listed in https://datatracker.ietf.org/doc/html/rfc1918
- 10.0.0.0/8
- 172.16.0.0/12
- 192.168.0.0/16
# Don't allow network access to the metadata IP
- 169.254.169.254/32
# Allow code in hubs to talk to ingress provider, so they can talk to
# the hub via its public URL
- to:
- namespaceSelector:
matchLabels:
name: support
podSelector:
matchLabels:
app.kubernetes.io/name: ingress-nginx
# If a hub is using autohttps instead of ingress-nginx, allow traffic
# to the autohttps pod as well
- to:
- podSelector:
matchLabels:
app: jupyterhub
component: autohttps
# Allow traffic to the proxy pod from user pods
# This is particularly important for daskhubs that utilise the proxy
# in order to create clusters (schedulers and workers)
- to:
- podSelector:
matchLabels:
app: jupyterhub
component: proxy
# Allow traffic to the traefik pod from user pods. Needed for daskhubs.
- to:
- podSelector:
matchLabels:
app.kubernetes.io/component: traefik
hub:
loadRoles:
# Should use this, not hub.config.JupyterHub.load_roles - that will
# override any existing load_roles set by z2jh
service-use:
name: user
scopes:
# Allow all users access to 'services', which includes dask-gateway & configurator
- access:services
- self
config:
JupyterHub:
# Allow unauthenticated prometheus requests
# Otherwise our prometheus server can't get hub metrics
authenticate_prometheus: false
KubeSpawner:
# Make sure working directory is where we mount the home folder
working_dir: /home/jovyan
# Increase timeout for Jupyter server to become 'ready', until
# https://github.com/2i2c-org/infrastructure/issues/2047 is fixed
http_timeout: 120
Authenticator:
# Don't allow test username to login into the hub
# The test service will still be able to create this hub username
# and start their server.
# Ref: https://github.com/2i2c-org/meta/issues/321
blocked_users:
- deployment-service-check
extraFiles:
configurator-schema-default:
mountPath: /usr/local/etc/jupyterhub-configurator/00-default.schema.json
data:
type: object
name: config
properties:
KubeSpawner.image:
type: string
title: User docker image
description: Determines languages, libraries and interfaces available
help: Leave this blank to use the default
Spawner.default_url:
type: string
title: Default User Interface
enum:
- "/tree"
- "/lab"
- "/rstudio"
default: "/tree"
enumMetadata:
interfaces:
- value: "/tree"
title: Classic Notebook
description: >-
The original single-document interface for creating
Jupyter Notebooks.
- value: "/lab"
title: JupyterLab
description: A Powerful next generation notebook interface
- value: "/rstudio"
title: RStudio
description: An IDE For R, created by the RStudio company
extraEnv:
BASEHUB_K8S_DIST:
valueFrom:
configMapKeyRef:
name: basehub-cluster-info
key: K8S_DIST
initContainers:
- name: templates-clone
image: alpine/git:2.40.1
command:
- /bin/sh
args:
- -c
# Remove the existing repo first if it exists, as otherwise we will
# error out when the pod restarts. /srv/extra-templates-dir is an
# emptyDir volume, so it is *not* cleaned up when the pod's containers restarts -
# only when the pod is deleted and cleaned back up.
# We also mount the emptyDir in `/srv/extra-templates-dir` but
# clone into a *subdirectory*, as the mount itself is owned by
# root, and git freaks out when that is the case. By putting
# the repo in a sub directory, we avoid the permission problems.
- |
rm -rf /srv/extra-templates-dir/repo;
git clone ${GIT_REPO_URL} /srv/extra-templates-dir/repo
env:
- name: GIT_REPO_URL
valueFrom:
configMapKeyRef:
name: hub-custom-templates-config
key: GIT_REPO_URL
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
volumeMounts:
- name: custom-templates
mountPath: /srv/extra-templates-dir
extraContainers:
- name: templates-sync
image: alpine/git:2.40.1
workingDir: /srv/extra-templates-dir/repo
command:
- /bin/sh
args:
- -c
- |
handle_sigterm() {
echo "SIGTERM received, terminating...";
exit;
}
trap handle_sigterm SIGTERM;
echo "Starting template sync...";
echo "";
echo "Info about local git repo to be synced:";
(
# set -x causes commands run to be printed, helping log readers
# understand what the generated output is about. set -x is
# configured within a subshell to just print info about the
# specific chosen commands and avoid printing info about running
# "echo", "sleep", "set +x", or similar commands.
set -x;
git remote -v;
ls -lhd /srv/extra-templates-dir/repo;
)
echo "";
echo "Syncing local git repo /srv/extra-templates-dir/repo against origin's branch $(GIT_REPO_BRANCH) every 5 minutes...";
while true; do
git fetch origin;
git reset --hard origin/$(GIT_REPO_BRANCH);
# signal handling can only be done between sleep calls, so this
# shouldn't be reduced to the otherwise equivalent "sleep 5m"
for i in $(seq 300); do
sleep 1s;
done
done
env:
- name: GIT_REPO_BRANCH
valueFrom:
configMapKeyRef:
name: hub-custom-templates-config
key: GIT_REPO_BRANCH
securityContext:
runAsUser: 1000
runAsGroup: 1000
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
volumeMounts:
- name: custom-templates
mountPath: /srv/extra-templates-dir
extraVolumes:
- name: custom-templates
emptyDir: {}
extraVolumeMounts:
- mountPath: /usr/local/share/jupyterhub/custom_templates
name: custom-templates
subPath: repo/templates
- mountPath: /usr/local/share/jupyterhub/static/extra-assets
name: custom-templates
subPath: repo/extra-assets
services:
# hub-health service helps us run health checks from the deployer script.
# The JupyterHub Helm chart will automatically generate an API token for
# services and expose it in a k8s Secret named `hub`. When we run health
# tests against a hub, we read this token from the k8s Secret to acquire
# the credentials needed to interacting with the JupyterHub API.
#
hub-health:
# FIXME: With JupyterHub 2 we can define a role for this service with
# more tightly scoped permissions based on our needs.
#
admin: true
image:
name: quay.io/2i2c/pilot-hub
tag: "0.0.1-0.dev.git.7670.hfd1b116d"
networkPolicy:
enabled: true
# interNamespaceAccessLabels=accept makes the hub pod's associated
# NetworkPolicy accept ingress from pods in other namespaces that has a
# hub.jupyter.org/network-access-hub=true label.
#
# ref: https://z2jh.jupyter.org/en/stable/resources/reference.html#hub-networkpolicy-internamespaceaccesslabels
#
interNamespaceAccessLabels: accept
ingress:
- from:
- podSelector:
matchLabels:
app: jupyterhub
component: hub
ports:
- port: 8081
protocol: TCP
# The jupyterhub-configurator is a managed jupyterhub service, which
# means it is started by jupyterhub as a separate process in the hub
# pod. Users will access it via the proxy pod, and JupyterHub itself is
# accessing it via localhost. This rule makes receiving such request on
# port 10101 from these destinations accepted.
#
# Maybe the container internal rule, for jupyterhub ->
# jupyterhub-configurator isn't needed, as the request is directly to
# 127.0.0.1:10101.
#
# ref: The extraConfig.02-basehub-spawner section below
# ref: https://github.com/yuvipanda/jupyterhub-configurator/blob/996405d2a7017153d5abe592b8028fed7a1801bb/jupyterhub_configurator/mixins.py#L7C5-L11
#
- from:
- podSelector:
matchLabels:
app: jupyterhub
component: proxy
- podSelector:
matchLabels:
app: jupyterhub
component: hub
ports:
- port: 10101
protocol: TCP
# FIXME: We should think about these resource requests/limits, see
# https://github.com/2i2c-org/infrastructure/issues/2127.
#
resources:
requests:
cpu: 0.01
memory: 128Mi
limits:
memory: 2Gi
extraConfig:
01-custom-theme: |
# adds a JupyterHub template path and updates template variables
from z2jh import get_config
c.JupyterHub.template_paths.insert(0,'/usr/local/share/jupyterhub/custom_templates')
c.JupyterHub.template_vars.update({
'custom': get_config('custom.homepage.templateVars')
})
02-basehub-spawner: |
# Updates JupyterHub.spawner_class and KubeSpawner.modify_pod_hook to
# handle features introduced by the basehub chart, specifically those
# configured via:
#
# jupyterhub.custom.singleuserAdmin
# jupyterhub.custom.jupyterhubConfigurator
#
from jupyterhub_configurator.mixins import ConfiguratorSpawnerMixin
from kubernetes_asyncio.client.models import V1VolumeMount
from kubespawner import KubeSpawner
from kubespawner.utils import get_k8s_model
from z2jh import get_config
# Setup jupyterhub-configurator only if its enabled
spawner_base_classes = [KubeSpawner]
if get_config("custom.jupyterhubConfigurator.enabled"):
spawner_base_classes = [ConfiguratorSpawnerMixin, KubeSpawner]
jhc_service = {
"name": "configurator",
"url": "http://configurator:10101",
"oauth_no_confirm": True,
"command": [
"python3",
"-m",
"jupyterhub_configurator.app",
"--Configurator.config_file=/usr/local/etc/jupyterhub-configurator/jupyterhub_configurator_config.py",
],
}
c.JupyterHub.services.append(jhc_service)
class BaseHubSpawner(*spawner_base_classes):
def start(self, *args, **kwargs):
"""
Modify admin users' spawners' non-list config based on
`jupyterhub.custom.singleuserAdmin`.
The list config is handled separately in by the
`modify_pod_hook`.
"""
custom_admin = get_config('custom.singleuserAdmin', {})
if not (self.user.admin and custom_admin):
return super().start(*args, **kwargs)
admin_environment = custom_admin.get('extraEnv', {})
self.environment.update(admin_environment)
admin_service_account = custom_admin.get('serviceAccountName')
if admin_service_account:
self.service_account = admin_service_account
return super().start(*args, **kwargs)
c.JupyterHub.spawner_class = BaseHubSpawner
def modify_pod_hook(spawner, pod):
"""
Modify admin user's pod manifests based on *list* config under
`jupyterhub.custom.singleuserAdmin`.
This hook is required to ensures that list config under
`jupyterhub.custom.singleuserAdmin` are appended and not just
overridden when a profile_list entry has a kubespawner_override
modifying the same config.
"""
custom_admin = get_config('custom.singleuserAdmin', {})
if not (spawner.user.admin and custom_admin):
return pod
for c in pod.spec.containers:
if c.name == "notebook":
notebook_container = c
break
else:
raise Exception("No container named 'notebook' found in pod definition")
admin_volume_mounts = custom_admin.get('extraVolumeMounts', [])
notebook_container.volume_mounts += [get_k8s_model(V1VolumeMount, obj) for obj in (admin_volume_mounts)]
return pod
c.KubeSpawner.modify_pod_hook = modify_pod_hook
03-cloud-storage-bucket: |
from z2jh import get_config
cloud_resources = get_config('custom.cloudResources')
scratch_bucket = cloud_resources['scratchBucket']
import os
if scratch_bucket['enabled']:
# FIXME: Support other providers too
assert cloud_resources['provider'] == 'gcp'
project_id = cloud_resources['gcp']['projectId']
release = os.environ['HELM_RELEASE_NAME']
bucket_protocol = 'gcs'
bucket_name = f'{project_id}-{release}-scratch-bucket'
env = {
'SCRATCH_BUCKET_PROTOCOL': bucket_protocol,
# Matches "daskhub.scratchBUcket.name" helm template
'SCRATCH_BUCKET_NAME': bucket_name,
# Use k8s syntax of $(ENV_VAR) to substitute env vars dynamically in other env vars
'SCRATCH_BUCKET': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
'PANGEO_SCRATCH': f'{bucket_protocol}://{bucket_name}/$(JUPYTERHUB_USER)',
}
c.KubeSpawner.environment.update(env)
04-2i2c-add-staff-user-ids-to-admin-users: |
from z2jh import get_config
add_staff_user_ids_to_admin_users = get_config("custom.2i2c.add_staff_user_ids_to_admin_users", False)
if add_staff_user_ids_to_admin_users:
user_id_type = get_config("custom.2i2c.add_staff_user_ids_of_type")
staff_user_ids = get_config(f"custom.2i2c.staff_{user_id_type}_ids", [])
# `c.Authenticator.admin_users` can contain additional admins, can be an empty list,
# or it cannot be defined at all.
# This should cover all these cases.
staff_user_ids.extend(get_config("hub.config.Authenticator.admin_users", []))
c.Authenticator.admin_users = staff_user_ids
05-per-user-disk: |
# Optionally, create a PVC per user - useful for per-user databases
from jupyterhub.utils import exponential_backoff
from z2jh import get_config
from kubespawner.objects import make_pvc
from functools import partial
def make_extra_pvc(component, name_template, storage_class, storage_capacity, spawner):
"""
Create a PVC object with given spec
"""
labels = spawner._build_common_labels({})
labels.update({
'component': component
})
annotations = spawner._build_common_annotations({})
storage_selector = spawner._expand_all(spawner.storage_selector)
return make_pvc(
name=spawner._expand_all(name_template),
storage_class=storage_class,
access_modes=['ReadWriteOnce'],
selector={},
storage=storage_capacity,
labels=labels,
annotations=annotations
)
extra_user_pvcs = get_config('custom.singleuser.extraPVCs', {})
if extra_user_pvcs:
make_db_pvc = partial(make_extra_pvc, 'db-storage', 'db-{username}', 'standard', '1G')
pvc_makers = [partial(
make_extra_pvc,
"extra-pvc",
p["name"],
p["class"],
p["capacity"]
) for p in extra_user_pvcs]
async def ensure_db_pvc(spawner):
""""
Ensure a PVC is created for this user's database volume
"""
for pvc_maker in pvc_makers:
pvc = pvc_maker(spawner)
# If there's a timeout, just let it propagate
await exponential_backoff(
partial(spawner._make_create_pvc_request, pvc, spawner.k8s_api_request_timeout),
f'Could not create pvc {pvc.metadata.name}',
# Each req should be given k8s_api_request_timeout seconds.
timeout=spawner.k8s_api_request_retry_timeout
)
c.Spawner.pre_spawn_hook = ensure_db_pvc
05-gh-teams: |
# Re-assignes c.KubeSpawner.profile_list to a callable that filters the
# initial configuration of profile_list based on the user's github
# org/team membership as declared via "allowed_teams" read from
# profile_list profiles.
#
# This only has effect if:
#
# - GitHubOAuthenticator is used.
# - GitHubOAuthenticator.populate_teams_in_auth_state is True, that
# requires Authenticator.enable_auth_state to be True as well.
# - The user is a normal user, and not "deployment-service-check".
#
import copy
from textwrap import dedent
from tornado import web
from oauthenticator.github import GitHubOAuthenticator
original_profile_list = c.KubeSpawner.profile_list
async def profile_list_allowed_teams_filter(spawner):
"""
Returns the initially configured profile_list filtered based on the
user's membership in each profile's `allowed_teams`. If
`allowed_teams` isn't set for a profile, its not filtered out.
`allowed_teams` is a list of GitHub organizations and/or teams
specified with `<github-org>` or `<github-org>:<team-name>` strings.
If the returned profile_list is filtered to not include a profile,
an error is raised and the user isn't allowed to start a server.
"""
# Ensure GitHubOAuthenticator with populate_teams_in_auth_state set
if not isinstance(spawner.authenticator, GitHubOAuthenticator):
return original_profile_list
if not spawner.authenticator.populate_teams_in_auth_state:
return original_profile_list
if spawner.user.name == "deployment-service-check":
print("Ignoring allowed_teams check for deployment-service-check")
return original_profile_list
# Ensure auth_state is populated with teams info
auth_state = await spawner.user.get_auth_state()
if not auth_state or "teams" not in auth_state:
print(f"User {spawner.user.name} does not have any auth_state set")
raise web.HTTPError(403)
# Format user's teams in auth_state to "org:team"
teams = set([f'{team["organization"]["login"]}:{team["slug"]}' for team in auth_state["teams"]])
print(f"User {spawner.user.name} is part of teams {' '.join(teams)}")
# Filter out profiles with allowed_teams set if the user isn't part
# of any.
allowed_profiles = []
for profile in copy.deepcopy(original_profile_list):
allowed_teams = profile.get("allowed_teams")
if allowed_teams is None:
allowed_profiles.append(profile)
continue
# allowed_teams can be "org" or "org:team", and we check
# membership just in time for orgs if needed
allowed_orgs = set([o for o in allowed_teams if ':' not in o])
allowed_teams = set([t for t in allowed_teams if ':' in t])
if allowed_teams & teams:
print(f"Allowing profile {profile['display_name']} for user {spawner.user.name} based on team membership")
allowed_profiles.append(profile)
continue
if "token_response" in auth_state:
access_token = auth_state["token_response"]["access_token"]
token_type = auth_state["token_response"]["token_type"]
else:
# token_response was introduced to auth_state in
# oauthenticator 16, so this is adjusting to an auth_state
# set by oauthenticator 15
access_token = auth_state["access_token"]
token_type = "token"
for allowed_org in allowed_orgs:
user_in_allowed_org = await spawner.authenticator._check_membership_allowed_organizations(
allowed_org, spawner.user.name, access_token, token_type
)
if user_in_allowed_org:
print(f"Allowing profile {profile['display_name']} for user {spawner.user.name} based on org membership")
allowed_profiles.append(profile)
break
if len(allowed_profiles) == 0:
# If no profiles are allowed, user should not be able to spawn anything!
# If we don't explicitly stop this, user will be logged into the 'default' settings
# set in singleuser, without any profile overrides. Not desired behavior
# FIXME: User doesn't actually see this error message, just the generic 403.
error_msg = dedent(f"""
Your GitHub team membership is insufficient to launch any server profiles.
GitHub teams you are a member of that this JupyterHub knows about are {', '.join(teams)}.
If you are part of additional teams, log out of this JupyterHub and log back in to refresh that information.
""")
raise web.HTTPError(403, error_msg)
return allowed_profiles
# Only set this customized profile_list *if* we already have a profile_list set
# otherwise, we'll show users a blank server options form and they won't be able to
# start their server
if c.KubeSpawner.profile_list:
# Customize list of profiles dynamically, rather than override options form.
# This is more secure, as users can't override the options available to them via the hub API
c.KubeSpawner.profile_list = profile_list_allowed_teams_filter
06-salted-username: |
# Allow anonymizing username to not store *any* PII
import json
import os
import base64
import hashlib
from z2jh import get_config
def salt_username(authenticator, handler, auth_model):
# Combine parts of user info with different provenances to eliminate
# possible deanonym attacks when things get leaked.
# FIXME: Provide useful error message when using an auth provider that
# doesn't give us 'oidc'
# FIXME: Raise error if this is attempted to be used with anything other than CILogon
USERNAME_DERIVATION_PEPPER = bytes.fromhex(os.environ['USERNAME_DERIVATION_PEPPER'])
cilogon_user = auth_model['auth_state']['cilogon_user']
user_key_parts = {
# Opaque ID from CILogon
"sub": cilogon_user['sub'],
# Combined together, opaque ID from upstream IDP (GitHub, Google, etc)
"idp": cilogon_user['idp'],
"oidc": cilogon_user['oidc']
}
# Use JSON here, so we don't have to deal with picking a string
# delimiter that will not appear in any of the parts.
# keys are sorted to ensure stable output over time
user_key = json.dumps(user_key_parts, sort_keys=True).encode('utf-8')
# The cryptographic choices made here are:
# - Use blake2, because it's fairly modern
# - Set blake2 to output 32 bytes as output, which is good enough for our use case
# - Use base32 encoding, as it will produce maximum of 56 characters
# for 32 bytes output by blake2. We have 63 character username
# limits in many parts of our code (particularly, in usernames
# being part of labels in kubernetes pods), so this helps
# - Convert everything to lowercase, as base64.b32encode produces
# all uppercase characters by default. Our usernames are preferably
# lowercase, as uppercase characters must be encoded for kubernetes'
# sake
# - strip the = padding provided by base64.b32encode. This is present
# primarily to be able to determine length of the original byte
# sequence accurately. We don't care about that here. Also = is
# encoded in kubernetes and puts us over the 63 char limit.
# - Use blake2 here explicitly as a keyed hash, rather than use
# hmac. This is the canonical way to do this, and helps make it
# clearer that we want it to output 32byte hashes. We could have
# used a 16byte hash here for shorter usernames, but it is unclear
# what that does to the security properties. So better safe than
# sorry, and stick to 32bytes (rather than the default 64)
digested_user_key = base64.b32encode(hashlib.blake2b(
user_key,
key=USERNAME_DERIVATION_PEPPER,
digest_size=32
).digest()).decode('utf-8').lower().rstrip("=")
# Replace the default name with our digested name, thus
# discarding the default name
auth_model["name"] = digested_user_key
return auth_model
if get_config('custom.auth.anonymizeUsername', False):
# https://jupyterhub.readthedocs.io/en/stable/reference/api/auth.html#jupyterhub.auth.Authenticator.post_auth_hook
c.Authenticator.post_auth_hook = salt_username