envs/shared/jupyterhub.yaml

hub:
  db:
    pvc:
      storage: 50Gi
      storageClassName: gp3
  authenticatePrometheus: false
  command: ["sh", "-c", "pip install boto3 && jupyterhub --config /usr/local/etc/jupyterhub/jupyterhub_config.py"]
  config:
    GitHubOAuthenticator:
      client_id: "${client_id}"
      client_secret: "${client_secret}"
      oauth_callback_url: "https://${jupyterhub_domain}/hub/oauth_callback"
      scope:
        - read:user
        - read:gist
        - user:email

cull:
  enabled: true
  timeout: 3600
  every: 300

proxy:
  https:
    enabled: true
    type: offload
    hosts:
      - "${jupyterhub_domain}"
  service:
    annotations:
      service.beta.kubernetes.io/aws-load-balancer-ssl-cert: ${ssl_cert_arn}
      service.beta.kubernetes.io/aws-load-balancer-ssl-ports: "https"
      service.beta.kubernetes.io/aws-load-balancer-backend-protocol: "tcp"
      service.beta.kubernetes.io/aws-load-balancer-connection-idle-timeout: "3600"
      service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
      service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing
      service.beta.kubernetes.io/aws-load-balancer-type: external
      service.beta.kubernetes.io/aws-load-balancer-cross-zone-load-balancing-enabled: 'true'
      service.beta.kubernetes.io/aws-load-balancer-ip-address-type: ipv4

singleuser:
  profileList:
    - display_name: "Tiny. Useful for many quick things"
      description: "0.5 CPU / 1 GB"
      profile_options:
        image:
          display_name: "Image"
          choices:
            standard:
              display_name: "Standard"
              default: true
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}"
      kubespawner_override:
        image_pull_policy: Always
        cpu_limit: 2
        cpu_guarantee: 0.25
        mem_limit: 2G
        mem_guarantee: 0.5G
        node_selector:
          NodePool: default
    # - display_name: "MIT Workshop (tmp)"
    #   description: "6 CPU / 16 GB up to 12C/32G. May take up to 15 mins to start."
    #   profile_options:
    #     image:
    #       display_name: "Image"
    #       choices:
    #         standard:
    #           display_name: "Standard"
    #           default: true
    #           kubespawner_override:
    #             image: "${singleuser_image_repo}:${singleuser_image_tag}"
    #         matlab:
    #           display_name: "MATLAB (must provide your own license)"
    #           kubespawner_override:
    #             image: "${singleuser_image_repo}:${singleuser_image_tag}-matlab"
    #   default: true
    #   kubespawner_override:
    #     image_pull_policy: Always
    #     cpu_limit: 12
    #     cpu_guarantee: 6
    #     mem_limit: 32G
    #     mem_guarantee: 16G
    #     node_selector:
    #       NodePool: cpu-on-demand
    - display_name: "Base"
      description: "6 CPU / 16 GB up to 12C/32G. May take up to 15 mins to start."
      profile_options:
        image:
          display_name: "Image"
          choices:
            standard:
              display_name: "Standard"
              default: true
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}"
            matlab:
              display_name: "MATLAB (must provide your own license)"
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}-matlab"
      default: true
      kubespawner_override:
        image_pull_policy: Always
        cpu_limit: 12
        cpu_guarantee: 6
        mem_limit: 32G
        mem_guarantee: 16G
        node_selector:
          NodePool: default

    - display_name: "Medium"
      description: "12C/32G up to 24C/64G. May take up to 15 mins to start."
      profile_options:
        image:
          display_name: "Image"
          choices:
            standard:
              display_name: "Standard"
              default: true
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}"
            matlab:
              display_name: "MATLAB (must provide your own license)"
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}-matlab"
      kubespawner_override:
        image_pull_policy: Always
        cpu_limit: 24
        cpu_guarantee: 12
        mem_limit: 64G
        mem_guarantee: 32G
        node_selector:
          NodePool: default
    - display_name: "Large"
      description: "24C/64G up to 48C/96G. May take up to 15 mins to start."
      profile_options:
        image:
          display_name: "Image"
          choices:
            standard:
              display_name: "Standard"
              default: true
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}"
            matlab:
              display_name: "MATLAB (must provide your own license)"
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}-matlab"
      kubespawner_override:
        image_pull_policy: Always
        cpu_limit: 48
        cpu_guarantee: 24
        mem_limit: 96G
        mem_guarantee: 64G
        node_selector:
          NodePool: default
    - display_name: "T4 GPU for inference"
      description: "8 CPU / 30 GB / 1 T4 GPU. May take up to 15 mins to start."
      profile_options:
        image:
          display_name: "Image"
          choices:
            standard:
              display_name: "Standard GPU"
              default: true
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}-gpu"
            matlab:
              display_name: "MATLAB GPU (must provide your own license)"
              kubespawner_override:
                image: "${singleuser_image_repo}:${singleuser_image_tag}-gpu-matlab"
      kubespawner_override:
        image_pull_policy: Always
        cpu_limit: 8
        cpu_guarantee: 6
        mem_limit: 31G
        mem_guarantee: 25G
        extra_resource_limits:
          nvidia.com/gpu: "1"
        node_selector:
          NodePool: gpu
          node.kubernetes.io/instance-type: g4dn.2xlarge
        tolerations:
          - key: "nvidia.com/gpu"
            operator: "Exists"
            effect: "NoSchedule"
          - key: "hub.jupyter.org/dedicated" # According to optimization docs https://z2jh.jupyter.org/en/latest/administrator/optimization.html
            operator: "Equal"
            value: "user"
            effect: "NoSchedule"
  defaultUrl: "/lab"
  memory:
    limit: 16G
    guarantee: 1G
  cpu:
    limit: 12
    guarantee: 0.5
  startTimeout: 2400
  storage:
    type: none
    extraVolumes:
      - name: fuse
        hostPath:
          path: /dev/fuse
      - name: shm-volume
        emptyDir:
          medium: Memory
      - name: persistent-storage-shared
        persistentVolumeClaim:
          claimName: efs-persist-shared
      - name: persistent-storage
        persistentVolumeClaim:
          claimName: efs-persist
    extraVolumeMounts:
      - name: fuse
        mountPath: /dev/fuse
      - name: shm-volume
        mountPath: /dev/shm
      - name: persistent-storage
        mountPath: '/home/jovyan'
        subPath: 'home/{username}'
      - name: persistent-storage-shared
        mountPath: '/shared'
        subPath: 'shared'
      - name: persistent-storage-shared
        mountPath: '/readonly'
        readOnly: true
        subPath: 'readonly'
  initContainers:
    - name: nfs-fixer
      image: alpine
      securityContext:
        runAsUser: 0
      volumeMounts:
      - name: persistent-storage
        mountPath: /nfs
        subPath: 'home/{username}'
      - name: persistent-storage
        mountPath: /shared
        subPath: 'shared'
      - name: persistent-storage
        mountPath: /readonly
        subPath: 'readonly'
      command:
      - sh
      - -c
      - >
        chmod 0775 /nfs;
        chown 1000:100 /nfs;
        chmod 0775 /shared;
        chown 1000:100 /shared;
        chmod 0555 /readonly
  cmd: "start-singleuser.sh"
  lifecycleHooks:
    postStart:
      exec:
        command:
          - "sh"
          - "-c"
          - >
            /opt/conda/envs/allen/bin/python -m ipykernel install --user --name allen --display-name "Python (Allen SDK)";
            git config --global user.email "$${GITHUB_EMAIL}";
            git config --global user.name "$${GITHUB_USER}"
  serviceAccountName: ${jupyter_single_user_sa_name}
  allowPrivilegeEscalation: true
  extraPodConfig: # This is needed for Jovyan user running in every single pod, access the Service Account
    securityContext:
        fsGroup: 100
  extraEnv: # Sudo needed to configure the proper permissions to start the notebook instance
    GRANT_SUDO: "yes"
    NOTEBOOK_ARGS: "--allow-root"
    CHOWN_HOME: "yes"
    CHOWN_HOME_OPTS: "-R"
    CHOWN_EXTRA: "/home/shared"
  uid: 0
  fsGid: 0
  cmd: null

# Optimizations configured according to this doc https://z2jh.jupyter.org/en/latest/administrator/optimization.html
scheduling:
  userScheduler:
    enabled: true
  podPriority:
    enabled: true
  userPlaceholder:
    enabled: false
    # TODO(asmacdo) 4 in dandi branch
    replicas: 1
  userPods:
    nodeAffinity:
      matchNodePurpose: require # This will force single-user pods to use an specific karpenter provisioner

prePuller:
  hook:
    enabled: false
  continuous:
    # TODO(asmacdo) enable this for quicker deployment
    # NOTE: if used with Karpenter, also add user-placeholders
    enabled: false

global:
  safeToShowValues: false