diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 0000000000..e7cf222854
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,11 @@
+version: 2
+
+conda:
+  environment: docs/environment.yml
+
+build:
+  image: latest
+
+python:
+  version: 3.8
+  install: []
diff --git a/config/hubs/2i2c.cluster.yaml b/config/hubs/2i2c.cluster.yaml
index 26c7fbc917..234f5cc306 100644
--- a/config/hubs/2i2c.cluster.yaml
+++ b/config/hubs/2i2c.cluster.yaml
@@ -6,6 +6,16 @@ gcp:
   project: two-eye-two-see
   cluster: pilot-hubs-cluster
   zone: us-central1-b
+support:
+  config:
+    grafana:
+      ingress:
+        hosts:
+          - grafana.pilot.2i2c.cloud
+        tls:
+          - secretName: grafana-tls
+            hosts:
+              - grafana.pilot.2i2c.cloud
 hubs:
   - name: staging
     domain: staging.pilot.2i2c.cloud
diff --git a/config/hubs/meom-ige.cluster.yaml b/config/hubs/meom-ige.cluster.yaml
new file mode 100644
index 0000000000..cc2af84a7f
--- /dev/null
+++ b/config/hubs/meom-ige.cluster.yaml
@@ -0,0 +1,144 @@
+name: meom-ige
+provider: gcp
+gcp:
+  key: secrets/meom.json
+  project: meom-ige-cnrs
+  cluster: meom-ige-cluster
+  zone: us-central1-b
+hubs:
+  - name: staging
+    domain: staging.meom-ige.2i2c.cloud
+    template: daskhub
+    auth0:
+      connection: github
+    config: &meomConfig
+      basehub:
+        nfsPVC:
+          nfs:
+            # from https://docs.aws.amazon.com/efs/latest/ug/mounting-fs-nfs-mount-settings.html
+            mountOptions:
+            - rsize=1048576
+            - wsize=1048576
+            - timeo=600
+            - soft # We pick soft over hard, so NFS lockups don't lead to hung processes
+            - retrans=2
+            - noresvport
+            serverIP: nfs-server-01
+            baseShareName: /export/home-01/homes/
+        jupyterhub:
+          custom:
+            homepage:
+              templateVars:
+                org:
+                  name: "SWOT Ocean Pangeo Team"
+                  logo_url: https://2i2c.org/media/logo.png
+                  url: https://meom-group.github.io/
+                designed_by:
+                  name: 2i2c
+                  url: https://2i2c.org
+                operated_by:
+                  name: 2i2c
+                  url: https://2i2c.org
+                funded_by:
+                  name: SWOT Ocean Pangeo Team
+                  url: https://meom-group.github.io/
+          singleuser:
+            extraEnv:
+              DATA_BUCKET: gcs://meom-ige-data
+              SCRATCH_BUCKET: 'gcs://meom-ige-scratch/$(JUPYTERHUB_USER)'
+            profileList:
+              # The mem-guarantees are here so k8s doesn't schedule other pods
+              # on these nodes. They need to be just under total allocatable
+              # RAM on a node, not total node capacity
+              - display_name: "Small"
+                description: "~2 CPU, ~8G RAM"
+                kubespawner_override:
+                  mem_limit: 8G
+                  mem_guarantee: 5.5G
+                  node_selector:
+                    node.kubernetes.io/instance-type: e2-standard-2
+              - display_name: "Medium"
+                description: "~8 CPU, ~32G RAM"
+                kubespawner_override:
+                  mem_limit: 32G
+                  mem_guarantee: 25G
+                  node_selector:
+                    node.kubernetes.io/instance-type: e2-standard-8
+              - display_name: "Large"
+                description: "~16 CPU, ~64G RAM"
+                kubespawner_override:
+                  mem_limit: 64G
+                  mem_guarantee: 55G
+                  node_selector:
+                    node.kubernetes.io/instance-type: e2-standard-16
+              - display_name: "Very Large"
+                description: "~32 CPU, ~128G RAM"
+                kubespawner_override:
+                  mem_limit: 128G
+                  mem_guarantee: 115G
+                  node_selector:
+                    node.kubernetes.io/instance-type: e2-standard-32
+              - display_name: "Huge"
+                description: "~64 CPU, ~256G RAM"
+                kubespawner_override:
+                  mem_limit: 256G
+                  mem_guarantee: 230G
+                  node_selector:
+                    node.kubernetes.io/instance-type: n2-standard-64
+            defaultUrl: /lab
+            image:
+              name: pangeo/pangeo-notebook
+              tag: 2021.02.19
+          scheduling:
+            userPlaceholder:
+              enabled: false
+              replicas: 0
+            userScheduler:
+              enabled: false
+          proxy:
+            service:
+              type: LoadBalancer
+            https:
+              enabled: true
+            chp:
+              resources:
+                requests:
+                  # FIXME: We want no guarantees here!!!
+                  # This is lowest possible value
+                  cpu: 0.01
+                  memory: 1Mi
+          hub:
+            resources:
+              requests:
+                # FIXME: We want no guarantees here!!!
+                # This is lowest possible value
+                cpu: 0.01
+                memory: 1Mi
+            config:
+              Authenticator:
+                allowed_users: &users
+                  - roxyboy
+                  - lesommer
+                  - auraoupa
+                  - yuvipanda
+                  - choldgraf
+                  - GeorgianaElena
+                admin_users: *users
+
+            allowNamedServers: true
+            networkPolicy:
+              # FIXME: For dask gateway
+              enabled: false
+            readinessProbe:
+              enabled: false
+      dask-gateway:
+          extraConfig:
+            idle: |
+              # timeout after 30 minutes of inactivity
+              c.KubeClusterConfig.idle_timeout = 1800
+  - name: prod
+    domain: meom-ige.2i2c.cloud
+    template: daskhub
+    auth0:
+      connection: github
+    config: *meomConfig
diff --git a/deployer/__main__.py b/deployer/__main__.py
index 70f2fabeb0..8dab3b3957 100644
--- a/deployer/__main__.py
+++ b/deployer/__main__.py
@@ -29,6 +29,23 @@ def build(cluster_name):
             cluster.build_image()
 
 
+def deploy_support(cluster_name):
+    """
+    Deploy support components to a cluster
+    """
+
+    # Validate our config with JSON Schema first before continuing
+    validate(cluster_name)
+
+
+    config_file_path = Path(os.getcwd()) / "config/hubs" / f'{cluster_name}.cluster.yaml'
+    with open(config_file_path) as f:
+        cluster = Cluster(yaml.load(f))
+
+    if cluster.support:
+        with cluster.auth():
+            cluster.deploy_support()
+
 def deploy(cluster_name, hub_name, skip_hub_health_test, config_path):
     """
     Deploy one or more hubs in a given cluster
@@ -97,6 +114,7 @@ def main():
     build_parser = subparsers.add_parser("build")
     deploy_parser = subparsers.add_parser("deploy")
     validate_parser = subparsers.add_parser("validate")
+    deploy_support_parser = subparsers.add_parser("deploy-support")
 
     build_parser.add_argument("cluster_name")
 
@@ -107,6 +125,8 @@ def main():
 
     validate_parser.add_argument("cluster_name")
 
+    deploy_support_parser.add_argument("cluster_name")
+
     args = argparser.parse_args()
 
     if args.action == "build":
@@ -115,6 +135,8 @@ def main():
         deploy(args.cluster_name, args.hub_name, args.skip_hub_health_test, args.config_path)
     elif args.action == 'validate':
         validate(args.cluster_name)
+    elif args.action == 'deploy-support':
+        deploy_support(args.cluster_name)
     else:
         # Print help message and exit when no arguments are passed
         # FIXME: Is there a better way to do this?
diff --git a/deployer/hub.py b/deployer/hub.py
index 5c020d1b10..b418839813 100644
--- a/deployer/hub.py
+++ b/deployer/hub.py
@@ -29,6 +29,7 @@ def __init__(self, spec):
             Hub(self, hub_yaml)
             for hub_yaml in self.spec['hubs']
         ]
+        self.support = self.spec.get('support', {})
 
     def build_image(self):
         self.ensure_docker_credhelpers()
@@ -77,6 +78,32 @@ def ensure_docker_credhelpers(self):
                 with open(dockercfg_path, 'w') as f:
                     json.dump(config, f, indent=4)
 
+    def deploy_support(self):
+        cert_manager_version = 'v1.3.1'
+
+        print("Provisioning cert-manager...")
+        subprocess.check_call([
+            'helm', 'upgrade', '--install', '--create-namespace',
+            '--namespace', 'cert-manager',
+            'cert-manager', 'jetstack/cert-manager',
+            '--version', cert_manager_version,
+            '--set', 'installCRDs=true'
+        ])
+        print("Done!")
+
+        print("Support charts...")
+
+        with tempfile.NamedTemporaryFile(mode='w') as f:
+            yaml.dump(self.support.get('config', {}), f)
+            f.flush()
+            subprocess.check_call([
+                'helm', 'upgrade', '--install', '--create-namespace',
+                '--namespace', 'support',
+                'support', 'support',
+                '-f', f.name,
+                '--wait'
+            ])
+        print("Done!")
 
     def auth_kubeconfig(self):
         """
diff --git a/docs/conf.py b/docs/conf.py
index c731b4d8e3..2f3d86dc18 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -62,43 +62,48 @@
 from yaml import safe_load
 import pandas as pd
 from pathlib import Path
-
-# Grab the latest list of clusters defined in pilot-hubs/
-clusters = Path("../config/hubs").glob("*")
-# Add list of repos managed outside pilot-hubs
-hub_list = [{
-    'name': 'University of Toronto',
-    'domain': 'jupyter.utoronto.ca',
-    'id': 'utoronto',
-    'template': 'base-hub ([deployment repo](https://github.com/utoronto-2i2c/jupyterhub-deploy/))'
-}]
-for cluster_info in clusters:
-    if "schema" in cluster_info.name:
-        continue
-    # For each cluster, grab it's YAML w/ the config for each hub
-    yaml = cluster_info.read_text()
-    cluster = safe_load(yaml)
-
-    # For each hub in cluster, grab its metadata and add it to the list
-    for hub in cluster['hubs']:
-        config = hub['config']
-        # Config is sometimes nested
-        if 'basehub' in config:
-            hub_config = config['basehub']['jupyterhub']
-        else:
-            hub_config = config['jupyterhub']
-        # Domain can be a list
-        if isinstance(hub['domain'], list):
-            hub['domain'] = hub['domain'][0]
-
-        hub_list.append({
-            'name': hub_config['custom']['homepage']['templateVars']['org']['name'],
-            'domain': f"[{hub['domain']}](https://{hub['domain']})",
-            "id": hub['name'],
-            "template": hub['template'],
-        })
-df = pd.DataFrame(hub_list)
-path_tmp = Path("tmp")
-path_tmp.mkdir(exist_ok=True)
-path_table = path_tmp / "hub-table.csv"
-df.to_csv(path_table, index=None)
\ No newline at end of file
+import subprocess
+
+def render_hubs():
+    # Grab the latest list of clusters defined in pilot-hubs/
+    clusters = Path("../config/hubs").glob("*")
+    # Add list of repos managed outside pilot-hubs
+    hub_list = [{
+        'name': 'University of Toronto',
+        'domain': 'jupyter.utoronto.ca',
+        'id': 'utoronto',
+        'template': 'base-hub ([deployment repo](https://github.com/utoronto-2i2c/jupyterhub-deploy/))'
+    }]
+    for cluster_info in clusters:
+        if "schema" in cluster_info.name:
+            continue
+        # For each cluster, grab it's YAML w/ the config for each hub
+        yaml = cluster_info.read_text()
+        cluster = safe_load(yaml)
+
+        # For each hub in cluster, grab its metadata and add it to the list
+        for hub in cluster['hubs']:
+            config = hub['config']
+            # Config is sometimes nested
+            if 'basehub' in config:
+                hub_config = config['basehub']['jupyterhub']
+            else:
+                hub_config = config['jupyterhub']
+            # Domain can be a list
+            if isinstance(hub['domain'], list):
+                hub['domain'] = hub['domain'][0]
+
+            hub_list.append({
+                'name': hub_config['custom']['homepage']['templateVars']['org']['name'],
+                'domain': f"[{hub['domain']}](https://{hub['domain']})",
+                "id": hub['name'],
+                "template": hub['template'],
+            })
+    df = pd.DataFrame(hub_list)
+    path_tmp = Path("tmp")
+    path_tmp.mkdir(exist_ok=True)
+    path_table = path_tmp / "hub-table.csv"
+    df.to_csv(path_table, index=None)
+
+
+render_hubs()
\ No newline at end of file
diff --git a/docs/environment.yml b/docs/environment.yml
new file mode 100644
index 0000000000..fda88096a9
--- /dev/null
+++ b/docs/environment.yml
@@ -0,0 +1,14 @@
+channels:
+- conda-forge
+dependencies:
+- go-terraform-docs
+- pip
+- python=3.8
+- pip:
+  - myst-parser[sphinx,linkify]
+  - sphinx-book-theme
+  - sphinx-panels
+  - sphinx-autobuild
+  - pandas
+  - pyyaml
+  - requests
diff --git a/docs/index.md b/docs/index.md
index ec6b28e3c8..75e6818b6e 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -42,6 +42,8 @@ Topic guides go more in-depth on a particular topic.
 topic/config.md
 topic/hub-templates.md
 topic/storage-layer.md
+topic/terraform.md
+topic/cluster-design.md
 ```
 
 ## Reference
diff --git a/docs/requirements.txt b/docs/requirements.txt
deleted file mode 100644
index 3883cb447e..0000000000
--- a/docs/requirements.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-myst-parser[sphinx,linkify]
-sphinx-book-theme
-sphinx-panels
-sphinx-autobuild
-pandas
-pyyaml
-requests
diff --git a/docs/topic/cluster-design.md b/docs/topic/cluster-design.md
new file mode 100644
index 0000000000..d9e9a5a835
--- /dev/null
+++ b/docs/topic/cluster-design.md
@@ -0,0 +1,75 @@
+# Cluster design considerations
+
+## GKE
+
+## Core node size
+
+In each cluster, we have a *core node pool* that is fairly static in size
+and always running. It needs enough capacity to run:
+
+1. Kubernetes system components - network policy enforcement, config connector
+   components, cluster autoscaler, kube-dns, etc.
+
+2. Per-cluster support components - like prometheus, grafana, cert-manager,
+   etc.
+
+3. Hub core components - the hub, proxy, userscheduler, etc
+
+4. (Optional) Dask gatway core components - the API gateway, controller, etc.
+
+Since the core nodes are *always running*, they form a big chunk of the
+cluster's *base cost* - the amount of money it costs each day, regardless
+of current number of running users. Picking an apporpriate node size and
+count here has a big effect.
+
+### On GKE
+
+GKE makes sizing this nodepool difficult, since `kube-system` components can take up quite
+a bit of resources. Even though the kind of clusters we run will most likely
+not stress components like `kube-dns` that much, there's no option to provide
+them fewer resource requests. So this will be our primary limitation in
+many ways.
+
+Adding [Config Connector](https://cloud.google.com/config-connector/docs/overview)
+or enabling [Network Policy](https://cloud.google.com/kubernetes-engine/docs/how-to/network-policy)
+requires more resources as well.
+
+With poorly structured experimentation, the current recommendation is to run
+3 `g1-small` instances for a cluster without config connector or network policy,
+or a single `n1-highmem-4` instance for a cluster with either of those options
+turned on. This needs to be better investigated.
+
+## Network Policy
+
+When hubs belonging to multiple organizations are run on the same cluster,
+we **must** enable [NetworkPolicy enforcement](https://cloud.google.com/kubernetes-engine/docs/how-to/network-policy)
+to isolate them from each other.
+
+## Cloud access credentials for hub users
+
+For hub users to access cloud resources (like storage buckets), they will need
+to be authorized via a [GCP ServiceAccount](https://cloud.google.com/iam/docs/service-accounts).
+This is different from a [Kubernetes ServiceAccount](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/),
+which is used to authenticate and authorize access to kubernetes resources (like spawning pods).
+
+For dask hubs, we want to provide users with write access to at least one storage
+bucket they can use for temporary data storage. User pods need to be given access to
+a GCP ServiceAccount that has write permissions to this bucket. There are two ways
+to do this:
+
+1. Provide appropriate permissions to the GCP ServiceAccount used by the node the user
+   pods are running on. When used with [Metadata Concealment](https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata#overview),
+   user pods can read / write from storage buckets. However, this grants the same permissions
+   to *all* pods on the cluster, and hence is unsuitable for clusters with multiple
+   hubs running for different organizations.
+
+2. Use the [GKE Cloud Config Connector](https://cloud.google.com/config-connector/docs/overview) to
+   create a GCP ServiceAccount + Storage Bucket for each hub via helm. This requires using
+   [Workload Identity](https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity) and
+   is incompatible with (1). This is required for multi-tenant clusters, since users on a hub
+   have much tighter scoped permissions.
+
+Long-term, (2) is the appropriate way to do this for everyone. However, it affects the size
+of the core node pool, since it runs some components in the cluster. For now, we use (1) for
+single-tenant clusters, and (2) for multi-tenant clusters. If nobody wants a scratch GCS bucket,
+neither option is required.
diff --git a/docs/topic/terraform.md b/docs/topic/terraform.md
new file mode 100644
index 0000000000..3ff13cbc54
--- /dev/null
+++ b/docs/topic/terraform.md
@@ -0,0 +1,16 @@
+# Terraform
+
+[Terraform](https://www.terraform.io/) is used to manage our infrastructure
+on Google Cloud Platform. The source files are under `terraform/` in this repo,
+and variables defining each cluster we manage are under `terraform/projects`.
+
+## Workspaces
+
+We use [terraform workspaces](https://www.terraform.io/docs/language/state/workspaces.html)
+to maintain separate terraform states about different clusters we manage.
+There should be one workspace per cluster, with the same name as the `.tfvars`
+file with variable definitions for that cluster.
+
+Workspaces are stored centrally in the `two-eye-two-see-org` GCP project, even
+when we use Terraform for projects running on AWS / Azure. You must have
+access to this project before you can use terraform for our infrastructure.
\ No newline at end of file
diff --git a/hub-templates/basehub/values.yaml b/hub-templates/basehub/values.yaml
index 60a41b75e0..0b9b9a6258 100644
--- a/hub-templates/basehub/values.yaml
+++ b/hub-templates/basehub/values.yaml
@@ -86,6 +86,8 @@ jupyterhub:
         limits:
           memory: 1Gi
     traefik:
+      image:
+        tag: v2.4.8
       nodeSelector:
         hub.jupyter.org/node-purpose: core
       resources:
diff --git a/support/Chart.yaml b/support/Chart.yaml
index 0e8d264acf..9d50e68e2a 100644
--- a/support/Chart.yaml
+++ b/support/Chart.yaml
@@ -7,24 +7,18 @@ dependencies:
   # Prometheus for collection of metrics.
   # https://github.com/prometheus-community/helm-charts/tree/main/charts/prometheus
   - name: prometheus
-    version: 11.15.0
+    version: 14.1.1
     repository: https://prometheus-community.github.io/helm-charts
 
   # Grafana for dashboarding of metrics.
   # https://github.com/grafana/helm-charts/tree/main/charts/grafana
   - name: grafana
-    version: 5.6.4
+    version: 6.11.0
     repository: https://grafana.github.io/helm-charts
 
   # ingress-nginx for a k8s Ingress resource controller that routes traffic from
   # a single IP entrypoint to various services exposed via k8s Ingress resources
   # that references this controller.
   - name: ingress-nginx
-    version: 2.15.0
-    repository: https://kubernetes.github.io/ingress-nginx
-
-  # cert-manager for acquisition of TLS certificates
-  # https://github.com/jetstack/cert-manager/tree/master/deploy/charts/cert-manager
-  - name: cert-manager
-    version: v1.0.0-beta.1
-    repository: https://charts.jetstack.io
+    version: 3.33.0
+    repository: https://kubernetes.github.io/ingress-nginx
\ No newline at end of file
diff --git a/support/values.yaml b/support/values.yaml
index f9f002143a..01aed37a8c 100644
--- a/support/values.yaml
+++ b/support/values.yaml
@@ -15,18 +15,20 @@ prometheus:
         # Deploy onto user nodes
         key: hub.jupyter.org_dedicated
         value: user
+      - effect: NoSchedule
+        # Deploy onto user nodes
+        key: k8s.dask.org_dedicated
+        value: worker
     updateStrategy:
       type: RollingUpdate
   pushgateway:
     enabled: false
-  rbac:
-    create: true
   server:
     resources:
       # Without this, prometheus can easily starve users
       requests:
         cpu: 0.2
-        memory: 768Mi
+        memory: 512Mi
       limits:
         cpu: 1
         memory: 2Gi
@@ -55,12 +57,7 @@ grafana:
     annotations:
       kubernetes.io/ingress.class: nginx
       cert-manager.io/cluster-issuer: letsencrypt-prod
-    hosts:
-      - grafana.pilot.2i2c.cloud
 
-  # grafana.ini:
-  #   server:
-  #     root_url: http://grafana.datahub.berkeley.edu/
   datasources:
     datasources.yaml:
       apiVersion: 1
diff --git a/terraform/.terraform-docs.yml b/terraform/.terraform-docs.yml
new file mode 100644
index 0000000000..f6449a2c65
--- /dev/null
+++ b/terraform/.terraform-docs.yml
@@ -0,0 +1,9 @@
+output:
+  mode: replace
+  template: |-
+    # Reference
+
+    <!-- Autogenerated TF Docs -->
+    <!-- BEGIN_TF_DOCS -->
+    {{ .Content }}
+    <!-- END_TF_DOCS -->
\ No newline at end of file
diff --git a/terraform/buckets.tf b/terraform/buckets.tf
new file mode 100644
index 0000000000..32904d1184
--- /dev/null
+++ b/terraform/buckets.tf
@@ -0,0 +1,18 @@
+/**
+* GCS buckets for use by hub users
+*/
+
+resource "google_storage_bucket" "user_buckets" {
+  for_each = var.user_buckets
+  name     = "${var.prefix}-${each.key}"
+  location = var.region
+  project  = var.project_id
+}
+
+resource "google_storage_bucket_iam_member" "member" {
+
+  for_each = var.user_buckets
+  bucket   = google_storage_bucket.user_buckets[each.key].name
+  role     = "roles/storage.admin"
+  member   = "serviceAccount:${google_service_account.cluster_sa.email}"
+}
diff --git a/terraform/cd.tf b/terraform/cd.tf
new file mode 100644
index 0000000000..7aedfbd6fc
--- /dev/null
+++ b/terraform/cd.tf
@@ -0,0 +1,31 @@
+/**
+* Setup Service Accounts for authentication during continuous deployment
+*/
+
+// Service account used by GitHub Actions to deploy to the cluster
+resource "google_service_account" "cd_sa" {
+  account_id   = "${var.prefix}-cd-sa"
+  display_name = "Continuous Deployment SA for ${var.prefix}"
+  project      = var.project_id
+}
+
+// Roles the service account needs to deploy hubs to the cluster
+resource "google_project_iam_member" "cd_sa_roles" {
+  for_each = var.cd_sa_roles
+
+  project = var.project_id
+  role    = each.value
+  member  = "serviceAccount:${google_service_account.cd_sa.email}"
+}
+
+// JSON encoded private key to be kept in secrets/* to for the
+// deployment script to authenticate to the cluster
+resource "google_service_account_key" "cd_sa" {
+  service_account_id = google_service_account.cd_sa.name
+  public_key_type    = "TYPE_X509_PEM_FILE"
+}
+
+output "ci_deployer_key" {
+  value     = base64decode(google_service_account_key.cd_sa.private_key)
+  sensitive = true
+}
diff --git a/terraform/cluster.tf b/terraform/cluster.tf
new file mode 100644
index 0000000000..fa8534f3b3
--- /dev/null
+++ b/terraform/cluster.tf
@@ -0,0 +1,223 @@
+resource "google_container_cluster" "cluster" {
+  # config_connector_config is in beta
+  provider = google-beta
+
+  name     = "${var.prefix}-cluster"
+  location = var.zone
+  project  = var.project_id
+
+  initial_node_count       = 1
+  remove_default_node_pool = true
+
+  addons_config {
+    http_load_balancing {
+      // FIXME: This used to not work well with websockets, and
+      // cost extra money as well. Let's validate if this is still
+      // true?
+      disabled = true
+    }
+    horizontal_pod_autoscaling {
+      // This isn't used anywhere, so let's turn this off
+      disabled = true
+    }
+    config_connector_config {
+      enabled = var.config_connector_enabled
+    }
+  }
+
+  dynamic "workload_identity_config" {
+    # Setup workload identity only if we're using config connector, otherwise
+    # just metadata concealment is used
+    for_each = var.config_connector_enabled == "" ? [] : [1]
+    content {
+      identity_namespace = "${var.project_id}.svc.id.goog"
+    }
+  }
+
+  release_channel {
+    # We upgrade clusters manually so we can manage downtime of
+    # master *and* nodes. When a cluster is in a release channel,
+    # upgrades (including disruptive node upgrades) happen automatically.
+    # So we disable it.
+    channel = "UNSPECIFIED"
+  }
+
+  cluster_autoscaling {
+    # This disables node autoprovisioning, not cluster autoscaling!
+    enabled = false
+    # Use a scheduler + autoscaling profile optimized for batch workloads like ours
+    # https://cloud.google.com/kubernetes-engine/docs/concepts/cluster-autoscaler#autoscaling_profiles
+    autoscaling_profile = "OPTIMIZE_UTILIZATION"
+  }
+
+  network_policy {
+    enabled = var.enable_network_policy
+  }
+
+  node_config {
+    # DO NOT TOUCH THIS BLOCK, IT REPLACES ENTIRE CLUSTER LOL
+    service_account = google_service_account.cluster_sa.email
+  }
+}
+
+resource "google_container_node_pool" "core" {
+  name     = "core-pool"
+  cluster  = google_container_cluster.cluster.name
+  project  = google_container_cluster.cluster.project
+  location = google_container_cluster.cluster.location
+
+
+  initial_node_count = 1
+  autoscaling {
+    min_node_count = 1
+    max_node_count = var.core_node_max_count
+  }
+
+  management {
+    auto_repair = true
+    # Auto upgrade will drain and setup nodes without us knowing,
+    # and this can cause outages when it hits the proxy nodes.
+    auto_upgrade = false
+  }
+
+
+  node_config {
+    labels = {
+      "hub.jupyter.org/node-purpose" = "core",
+      "k8s.dask.org/node-purpose"    = "core"
+    }
+    machine_type = var.core_node_machine_type
+    disk_size_gb = 30
+
+    # Our service account gets all OAuth scopes so it can access
+    # all APIs, but only fine grained permissions + roles are
+    # granted via the service account. This follows Google's
+    # recommendation at https://cloud.google.com/compute/docs/access/service-accounts#associating_a_service_account_to_an_instance
+    service_account = google_service_account.cluster_sa.email
+    oauth_scopes = [
+      "https://www.googleapis.com/auth/cloud-platform"
+    ]
+  }
+}
+
+resource "google_container_node_pool" "notebook" {
+  name     = "nb-${each.key}"
+  cluster  = google_container_cluster.cluster.name
+  project  = google_container_cluster.cluster.project
+  location = google_container_cluster.cluster.location
+
+  for_each = var.notebook_nodes
+
+  # WARNING: Do not change this value, it will cause the nodepool
+  # to be destroyed & re-created. If you want to increase number of
+  # nodes in a node pool, set the min count to that number and then
+  # scale the pool manually.
+  initial_node_count = each.value.min
+  autoscaling {
+    min_node_count = each.value.min
+    max_node_count = each.value.max
+  }
+
+  management {
+    auto_repair  = true
+    auto_upgrade = false
+  }
+
+
+  node_config {
+    workload_metadata_config {
+      # Config Connector requires workload identity to be enabled (via GKE_METADATA_SERVER).
+      # If config connector is not necessary, we use simple metadata concealment
+      # (https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata)
+      # to expose the node CA to users safely.
+      # FIXME: This should be a bit more fine-grained - it should be possible to disable
+      # config connector and completely hide all node metadata from user pods
+      node_metadata = var.config_connector_enabled ? "GKE_METADATA_SERVER" : "SECURE"
+    }
+    labels = {
+      # Notebook pods and dask schedulers can exist here
+      "hub.jupyter.org/node-purpose" = "user",
+      "k8s.dask.org/node-purpose"    = "scheduler",
+    }
+
+    taint = [{
+      key    = "hub.jupyter.org_dedicated"
+      value  = "user"
+      effect = "NO_SCHEDULE"
+    }]
+    machine_type = each.value.machine_type
+
+    # Our service account gets all OAuth scopes so it can access
+    # all APIs, but only fine grained permissions + roles are
+    # granted via the service account. This follows Google's
+    # recommendation at https://cloud.google.com/compute/docs/access/service-accounts#associating_a_service_account_to_an_instance
+    service_account = google_service_account.cluster_sa.email
+    oauth_scopes = [
+      "https://www.googleapis.com/auth/cloud-platform"
+    ]
+  }
+}
+
+resource "google_container_node_pool" "dask_worker" {
+  name     = "dask-${each.key}"
+  cluster  = google_container_cluster.cluster.name
+  project  = google_container_cluster.cluster.project
+  location = google_container_cluster.cluster.location
+
+  # Default to same config as notebook nodepools config
+  for_each = length(var.dask_nodes) == 0 ? var.notebook_nodes : var.dask_nodes
+
+  # WARNING: Do not change this value, it will cause the nodepool
+  # to be destroyed & re-created. If you want to increase number of
+  # nodes in a node pool, set the min count to that number and then
+  # scale the pool manually.
+  initial_node_count = 0
+  autoscaling {
+    min_node_count = each.value.min
+    max_node_count = each.value.max
+  }
+
+  management {
+    auto_repair  = true
+    auto_upgrade = false
+  }
+
+  node_config {
+
+    preemptible = true
+    # SSD Disks for dask workers make image pulls much faster
+    # Since we might have many dask workers spinning up at the
+    # same time, the extra cost of using this is probably worth it.
+    disk_type = "pd-ssd"
+
+    workload_metadata_config {
+      # Config Connector requires workload identity to be enabled (via GKE_METADATA_SERVER).
+      # If config connector is not necessary, we use simple metadata concealment
+      # (https://cloud.google.com/kubernetes-engine/docs/how-to/protecting-cluster-metadata)
+      # to expose the node CA to users safely.
+      # FIXME: This should be a bit more fine-grained - it should be possible to disable
+      # config connector and completely hide all node metadata from user pods
+      node_metadata = var.config_connector_enabled ? "GKE_METADATA_SERVER" : "SECURE"
+    }
+    labels = {
+      "k8s.dask.org/node-purpose" = "worker",
+    }
+
+    taint = [{
+      key    = "k8s.dask.org_dedicated"
+      value  = "worker"
+      effect = "NO_SCHEDULE"
+    }]
+    machine_type = each.value.machine_type
+
+    # Our service account gets all OAuth scopes so it can access
+    # all APIs, but only fine grained permissions + roles are
+    # granted via the service account. This follows Google's
+    # recommendation at https://cloud.google.com/compute/docs/access/service-accounts#associating_a_service_account_to_an_instance
+    service_account = google_service_account.cluster_sa.email
+    oauth_scopes = [
+      "https://www.googleapis.com/auth/cloud-platform"
+    ]
+  }
+}
+
diff --git a/terraform/main.tf b/terraform/main.tf
index b9f011a634..b134f1fe7e 100644
--- a/terraform/main.tf
+++ b/terraform/main.tf
@@ -5,155 +5,37 @@ terraform {
   }
 }
 
-module "service_accounts" {
-  source        = "terraform-google-modules/service-accounts/google"
-  version       = "~> 2.0"
-  project_id    = var.project_id
-  prefix        = var.prefix
-  generate_keys = true
-  names         = ["cd-sa"]
-  project_roles = [
-    "${var.project_id}=>roles/container.admin",
-    "${var.project_id}=>roles/artifactregistry.writer",
-    # FIXME: This is way too much perms just to ssh into a node
-    "${var.project_id}=>roles/compute.instanceAdmin.v1"
-  ]
+// Service account used by all the nodes and pods in our cluster
+resource "google_service_account" "cluster_sa" {
+  account_id   = "${var.prefix}-cluster-sa"
+  display_name = "Cluster SA for ${var.prefix}"
+  project      = var.project_id
 }
 
-output "ci_deployer_key" {
-  value     = module.service_accounts.keys["cd-sa"]
-  sensitive = true
+// To access GCS buckets with requestor pays, the calling code needs
+// to have serviceusage.services.use permission. We create a role
+// granting just this to provide the cluster SA, so user pods can
+// use it. See https://cloud.google.com/storage/docs/requester-pays
+// for more info
+resource "google_project_iam_custom_role" "identify_project_role" {
+  // Role names can't contain -, so we swap them out. BOO
+  role_id     = replace("${var.prefix}_user_sa_role", "-", "_")
+  project     = var.project_id
+  title       = "Identify as project role for users in ${var.prefix}"
+  description = "Minimal role for hub users on ${var.prefix} to identify as current project"
+  permissions = ["serviceusage.services.use"]
 }
 
-resource "google_artifact_registry_repository" "container_repository" {
-  provider = google-beta
-
-  location      = var.region
-  repository_id = "low-touch-hubs"
-  format        = "DOCKER"
-  project       = var.project_id
-}
-
-// Give the GKE service account access to our artifact registry docker repo
-resource "google_project_iam_member" "project" {
+resource "google_project_iam_member" "identify_project_binding" {
   project = var.project_id
-  role    = "roles/artifactregistry.reader"
-  member  = "serviceAccount:${module.gke.service_account}"
+  role    = google_project_iam_custom_role.identify_project_role.name
+  member  = "serviceAccount:${google_service_account.cluster_sa.email}"
 }
 
+resource "google_project_iam_member" "cluster_sa_roles" {
+  for_each = var.cluster_sa_roles
 
-module "gke" {
-  source                     = "terraform-google-modules/kubernetes-engine/google"
-  project_id                 = var.project_id
-  name                       = "${var.prefix}-cluster"
-  regional                   = var.regional_cluster
-  region                     = var.region
-  zones                      = [var.zone]
-  network                    = "default"
-  subnetwork                 = "default"
-  ip_range_pods              = ""
-  ip_range_services          = ""
-  http_load_balancing        = false
-  horizontal_pod_autoscaling = false
-  network_policy             = true
-  # We explicitly set up a core pool, so don't need the default
-  remove_default_node_pool = true
-  kubernetes_version       = "1.19.9-gke.1400"
-
-
-  node_pools = [
-    {
-      name               = "core-pool"
-      machine_type       = var.core_node_machine_type
-      min_count          = 1
-      max_count          = var.core_node_max_count
-      local_ssd_count    = 0
-      disk_size_gb       = var.core_node_disk_size_gb
-      disk_type          = "pd-standard"
-      image_type         = "COS"
-      auto_repair        = true
-      auto_upgrade       = false
-      preemptible        = false
-      initial_node_count = 1
-      # Let's pin this so we don't upgrade each time terraform runs
-      version = "1.19.9-gke.1400"
-    },
-    {
-      name               = "user-pool"
-      machine_type       = var.user_node_machine_type
-      min_count          = 0
-      max_count          = var.user_node_max_count
-      local_ssd_count    = 0
-      disk_size_gb       = 100
-      disk_type          = "pd-ssd"
-      image_type         = "COS"
-      auto_repair        = true
-      auto_upgrade       = false
-      preemptible        = false
-      initial_node_count = 0
-      # Let's pin this so we don't upgrade each time terraform runs
-      version = "1.19.9-gke.1400"
-    },
-    {
-      name            = "dask-worker-pool"
-      machine_type    = var.dask_worker_machine_type
-      min_count       = 0
-      max_count       = 10
-      local_ssd_count = 0
-      disk_size_gb    = 100
-      # Fast startup is important here, so we get fast SSD disks
-      # This pulls in user images much faster
-      disk_type          = "pd-ssd"
-      image_type         = "COS"
-      auto_repair        = true
-      auto_upgrade       = false
-      preemptible        = true
-      initial_node_count = 0
-      # Let's pin this so we don't upgrade each time terraform runs
-      version = "1.19.9-gke.1400"
-    },
-  ]
-
-  node_pools_oauth_scopes = {
-    all = [
-      # FIXME: Is this the minimal?
-      #
-      "https://www.googleapis.com/auth/cloud-platform",
-    ]
-  }
-
-  node_pools_labels = {
-    all = {}
-
-    core-pool = {
-      default-node-pool              = true
-      "hub.jupyter.org/pool-name"    = "core-pool",
-      "hub.jupyter.org/node-purpose" = "core",
-      "k8s.dask.org/node-purpose"    = "core"
-    }
-    user-pool = {
-      "hub.jupyter.org/pool-name"    = "user-pool"
-      "hub.jupyter.org/node-purpose" = "user",
-      "k8s.dask.org/node-purpose"    = "scheduler"
-    }
-    dask-worker-pool = {
-      "hub.jupyter.org/pool-name" = "dask-worker-pool"
-      "k8s.dask.org/node-purpose" = "worker"
-    }
-  }
-
-  node_pools_taints = {
-    all = []
-
-    user-pool = [{
-      key    = "hub.jupyter.org_dedicated"
-      value  = "user"
-      effect = "NO_SCHEDULE"
-    }]
-    dask-worker-pool = [{
-      key    = "k8s.dask.org_dedicated"
-      value  = "worker"
-      effect = "NO_SCHEDULE"
-    }]
-  }
+  project = var.project_id
+  role    = each.value
+  member  = "serviceAccount:${google_service_account.cluster_sa.email}"
 }
diff --git a/terraform/cloudbank.tfvars b/terraform/projects/cloudbank.tfvars
similarity index 66%
rename from terraform/cloudbank.tfvars
rename to terraform/projects/cloudbank.tfvars
index 7235294676..123d8e54ae 100644
--- a/terraform/cloudbank.tfvars
+++ b/terraform/projects/cloudbank.tfvars
@@ -3,7 +3,11 @@ project_id = "cb-1003-1696"
 
 core_node_machine_type = "n1-highmem-4"
 
+# Multi-tenant cluster, network policy is required to enforce separation between hubs
 enable_network_policy    = true
+
+# No plans to provide storage buckets to users on this hub, so no need to deploy
+# config connector
 config_connector_enabled = false
 
 notebook_nodes = {
diff --git a/terraform/hackathon-2i2c-project-alpha.tfvars b/terraform/projects/hackathon-2i2c-project-alpha.tfvars
similarity index 100%
rename from terraform/hackathon-2i2c-project-alpha.tfvars
rename to terraform/projects/hackathon-2i2c-project-alpha.tfvars
diff --git a/terraform/projects/meom-ige.tfvars b/terraform/projects/meom-ige.tfvars
new file mode 100644
index 0000000000..c95fdd06fa
--- /dev/null
+++ b/terraform/projects/meom-ige.tfvars
@@ -0,0 +1,55 @@
+prefix     = "meom-ige"
+project_id = "meom-ige-cnrs"
+
+# Minimum number of nodes required to fit kube-system is either
+# 2 n1-highcpu-2 nodes, or 3 g1-small nodes. If you don't enable
+# networkpolicy, you can get away with 1 n1-custom-4-3840 node -
+# but with that enable, calico-typha wants 2 replicas that
+# must run on two nodes since they both want the same hostport.
+# 3 g1-small is 13$ a month, wile a single n2-highcpu-2 is
+# already 36$ a month. We want very low base price, and
+# our core nodes will barely see any CPU usage, so g1-small is
+# the way to go
+core_node_machine_type = "g1-small"
+
+# Single-tenant cluster, network policy not needed
+enable_network_policy    = false
+
+# Single tenant cluster, so bucket access is provided via
+# metadata concealment + node SA. Config Connector not needed.
+config_connector_enabled = false
+
+notebook_nodes = {
+  "small" : {
+    min : 0,
+    max : 20,
+    machine_type : "e2-standard-2"
+  },
+  "medium" : {
+    min : 0,
+    max : 20,
+    machine_type : "e2-standard-8"
+  },
+  "large" : {
+    min : 0,
+    max : 20,
+    machine_type : "e2-standard-16"
+  },
+  "very-large" : {
+    min : 0,
+    max : 20,
+    machine_type : "e2-standard-32"
+  },
+  "huge" : {
+    min : 0,
+    max : 20,
+    # e2 instances only go upto 32 cores
+    machine_type : "n2-standard-64"
+  },
+
+}
+
+user_buckets = [
+  "scratch",
+  "data"
+]
\ No newline at end of file
diff --git a/terraform/pilot-hubs.tfvars b/terraform/projects/pilot-hubs.tfvars
similarity index 69%
rename from terraform/pilot-hubs.tfvars
rename to terraform/projects/pilot-hubs.tfvars
index 2ed7f3a47e..4d566b6b83 100644
--- a/terraform/pilot-hubs.tfvars
+++ b/terraform/projects/pilot-hubs.tfvars
@@ -3,7 +3,10 @@ project_id = "two-eye-two-see"
 
 core_node_machine_type = "n1-highmem-4"
 
+# Multi-tenant cluster, network policy is required to enforce separation between hubs
 enable_network_policy    = true
+
+# Some hubs want a storage bucket, so we need to have config connector enabled
 config_connector_enabled = true
 
 notebook_nodes = {
diff --git a/terraform/registry.tf b/terraform/registry.tf
new file mode 100644
index 0000000000..29c8e3975a
--- /dev/null
+++ b/terraform/registry.tf
@@ -0,0 +1,13 @@
+/**
+* Artifact Registry to store user images for this cluster.
+*
+* Hosting it in the same project makes node startup time faster.
+*/
+resource "google_artifact_registry_repository" "registry" {
+  provider = google-beta
+
+  location      = var.region
+  repository_id = "${var.prefix}-registry"
+  format        = "DOCKER"
+  project       = var.project_id
+}
diff --git a/terraform/variables.tf b/terraform/variables.tf
index eeb8d51a09..2d8704e04b 100644
--- a/terraform/variables.tf
+++ b/terraform/variables.tf
@@ -1,54 +1,173 @@
 variable "prefix" {
-  type = string
+  type        = string
+  description = <<-EOT
+  Prefix for all objects created by terraform.
+
+  Primary identifier to 'group' together resources created by
+  this terraform module. Prevents clashes with other resources
+  in the cloud project / account.
+
+  Should not be changed after first terraform apply - doing so
+  will recreate all resources.
+
+  Should not end with a '-', that is automatically added.
+  EOT
 }
 
 variable "project_id" {
-  type = string
-  # This is in Toronto!
-  default = "two-eye-two-see"
+  type        = string
+  description = <<-EOT
+  GCP Project ID to create resources in.
+
+  Should be the id, rather than display name of the project.
+  EOT
+}
+
+variable "notebook_nodes" {
+  type        = map(map(string))
+  description = "Notebook node pools to create"
+  default     = {}
+}
+
+variable "dask_nodes" {
+  type        = map(map(string))
+  description = "Dask node pools to create. Defaults to notebook_nodes"
+  default     = {}
+}
+
+variable "config_connector_enabled" {
+  type        = bool
+  default     = false
+  description = <<-EOT
+  Enable GKE Config Connector to manage GCP resources via kubernetes.
+
+  GKE Config Connector (https://cloud.google.com/config-connector/docs/overview)
+  allows creating GCP resources (like buckets, VMs, etc) via creating Kubernetes
+  Custom Resources. We use this to create buckets on a per-hub level,
+  and could use it for other purposes in the future.
+
+  Enabling this increases base cost, as config connector related pods
+  needs to run on the cluster.
+  EOT
+}
+
+variable "cluster_sa_roles" {
+  type = set(string)
+  default = [
+    "roles/logging.logWriter",
+    "roles/monitoring.metricWriter",
+    "roles/monitoring.viewer",
+    "roles/stackdriver.resourceMetadata.writer",
+    "roles/artifactregistry.reader"
+  ]
+  description = <<-EOT
+  List of roles granted to the SA assumed by cluster nodes.
+
+  The defaults grant just enough access for the components on the node
+  to write metrics & logs to stackdriver, and pull images from artifact registry.
+
+  https://cloud.google.com/kubernetes-engine/docs/how-to/hardening-your-cluster
+  has more information.
+  EOT
+}
+
+variable "cd_sa_roles" {
+  type = set(string)
+  default = [
+    "roles/container.admin",
+    "roles/artifactregistry.writer"
+  ]
+  description = <<-EOT
+  List of roles granted to the SA used by our CI/CD pipeline.
+
+  We want to automatically build / push images, and deploy to
+  the kubernetes cluster from CI/CD (on GitHub actions, mostly).
+  A JSON key for this will be generated (with
+  `terraform output -raw ci_deployer_key`) and stored in the
+  repo in encrypted form.
+
+  The default provides *full* access to the entire kubernetes
+  cluster! This is dangerous, but it is unclear how to tamp
+  it down.
+  EOT
 }
 
 variable "region" {
-  type    = string
-  default = "us-central1"
+  type        = string
+  default     = "us-central1"
+  description = <<-EOT
+  GCP Region the cluster & resources will be placed in.
+
+  For research clusters, this should be closest to where
+  your source data is.
+
+  This does not imply that the cluster will be a regional
+  cluster.
+  EOT
+
 }
 
 variable "zone" {
-  type    = string
-  default = "us-central1-b"
-}
+  type        = string
+  default     = "us-central1-b"
+  description = <<-EOT
+  GCP Zone the cluster & nodes will be set up in.
 
-variable "regional_cluster" {
-  type    = string
-  default = "false"
+  Even with a regional cluster, all the cluster nodes will
+  be on a single zone. NFS and supporting VMs will need to
+  be in this zone as well.
+  EOT
 }
 
 variable "core_node_machine_type" {
-  type    = string
-  default = "n1-highmem-4"
+  type        = string
+  default     = "g1-small"
+  description = <<-EOT
+  Machine type to use for core nodes.
+
+  Core nodes will always be on, and count as 'base cost'
+  for a cluster. We should try to run with as few of them
+  as possible.
+
+  For single-tenant clusters, a single g1-small node seems
+  enough - if network policy and config connector are not on.
+  For others, please experiment to see what fits.
+  EOT
 }
 
 variable "core_node_max_count" {
-  type    = number
-  default = 5
-}
+  type        = number
+  default     = 5
+  description = <<-EOT
+  Maximum number of core nodes available.
 
-variable "core_node_disk_size_gb" {
-  type    = number
-  default = 50
-}
+  Core nodes can scale up to this many nodes if necessary.
+  They are part of the 'base cost', should be kept to a minimum.
+  This number should be small enough to prevent runaway scaling,
+  but large enough to support ocassional spikes for whatever reason.
 
-variable "user_node_machine_type" {
-  type    = string
-  default = "n1-standard-4"
+  Minimum node count is fixed at 1.
+  EOT
 }
 
-variable "user_node_max_count" {
-  type    = number
-  default = 10
+variable "enable_network_policy" {
+  type        = bool
+  default     = true
+  description = <<-EOT
+  Enable kubernetes network policy enforcement.
+
+  Our z2jh deploys NetworkPolicies by default - but they are
+  not enforced unless enforcement is turned on here. This takes
+  up some cluster resources, so we could turn it off in cases
+  where we are trying to minimize base cost.
+
+  https://cloud.google.com/kubernetes-engine/docs/how-to/network-policy
+  has more information.
+  EOT
 }
 
-variable "dask_worker_machine_type" {
-  type    = string
-  default = "e2-highmem-2"
+variable "user_buckets" {
+  type        = set(any)
+  default     = []
+  description = "Buckets to create for the project, they will be prefixed with {var.prefix}-"
 }